aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c74
-rw-r--r--net/ipv4/ah4.c295
-rw-r--r--net/ipv4/cipso_ipv4.c2
-rw-r--r--net/ipv4/datagram.c18
-rw-r--r--net/ipv4/devinet.c155
-rw-r--r--net/ipv4/fib_frontend.c26
-rw-r--r--net/ipv4/fib_hash.c25
-rw-r--r--net/ipv4/fib_rules.c2
-rw-r--r--net/ipv4/fib_trie.c26
-rw-r--r--net/ipv4/icmp.c13
-rw-r--r--net/ipv4/igmp.c45
-rw-r--r--net/ipv4/inet_connection_sock.c21
-rw-r--r--net/ipv4/inet_diag.c28
-rw-r--r--net/ipv4/inet_hashtables.c36
-rw-r--r--net/ipv4/inet_timewait_sock.c14
-rw-r--r--net/ipv4/inetpeer.c5
-rw-r--r--net/ipv4/ip_fragment.c7
-rw-r--r--net/ipv4/ip_gre.c62
-rw-r--r--net/ipv4/ip_input.c2
-rw-r--r--net/ipv4/ip_output.c15
-rw-r--r--net/ipv4/ip_sockglue.c12
-rw-r--r--net/ipv4/ipip.c71
-rw-r--r--net/ipv4/ipmr.c29
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c8
-rw-r--r--net/ipv4/raw.c33
-rw-r--r--net/ipv4/route.c5
-rw-r--r--net/ipv4/syncookies.c30
-rw-r--r--net/ipv4/tcp.c15
-rw-r--r--net/ipv4/tcp_input.c32
-rw-r--r--net/ipv4/tcp_ipv4.c119
-rw-r--r--net/ipv4/tcp_minisocks.c12
-rw-r--r--net/ipv4/tcp_output.c22
-rw-r--r--net/ipv4/tcp_probe.c11
-rw-r--r--net/ipv4/tcp_timer.c12
-rw-r--r--net/ipv4/udp.c470
-rw-r--r--net/ipv4/udplite.c5
36 files changed, 1137 insertions, 620 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 57737b8d1711..7d12c6a9b19b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -174,12 +174,12 @@ static int inet_autobind(struct sock *sk)
174 /* We may need to bind the socket. */ 174 /* We may need to bind the socket. */
175 lock_sock(sk); 175 lock_sock(sk);
176 inet = inet_sk(sk); 176 inet = inet_sk(sk);
177 if (!inet->num) { 177 if (!inet->inet_num) {
178 if (sk->sk_prot->get_port(sk, 0)) { 178 if (sk->sk_prot->get_port(sk, 0)) {
179 release_sock(sk); 179 release_sock(sk);
180 return -EAGAIN; 180 return -EAGAIN;
181 } 181 }
182 inet->sport = htons(inet->num); 182 inet->inet_sport = htons(inet->inet_num);
183 } 183 }
184 release_sock(sk); 184 release_sock(sk);
185 return 0; 185 return 0;
@@ -262,7 +262,8 @@ static inline int inet_netns_ok(struct net *net, int protocol)
262 * Create an inet socket. 262 * Create an inet socket.
263 */ 263 */
264 264
265static int inet_create(struct net *net, struct socket *sock, int protocol) 265static int inet_create(struct net *net, struct socket *sock, int protocol,
266 int kern)
266{ 267{
267 struct sock *sk; 268 struct sock *sk;
268 struct inet_protosw *answer; 269 struct inet_protosw *answer;
@@ -325,7 +326,7 @@ lookup_protocol:
325 } 326 }
326 327
327 err = -EPERM; 328 err = -EPERM;
328 if (answer->capability > 0 && !capable(answer->capability)) 329 if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
329 goto out_rcu_unlock; 330 goto out_rcu_unlock;
330 331
331 err = -EAFNOSUPPORT; 332 err = -EAFNOSUPPORT;
@@ -354,7 +355,7 @@ lookup_protocol:
354 inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; 355 inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
355 356
356 if (SOCK_RAW == sock->type) { 357 if (SOCK_RAW == sock->type) {
357 inet->num = protocol; 358 inet->inet_num = protocol;
358 if (IPPROTO_RAW == protocol) 359 if (IPPROTO_RAW == protocol)
359 inet->hdrincl = 1; 360 inet->hdrincl = 1;
360 } 361 }
@@ -364,7 +365,7 @@ lookup_protocol:
364 else 365 else
365 inet->pmtudisc = IP_PMTUDISC_WANT; 366 inet->pmtudisc = IP_PMTUDISC_WANT;
366 367
367 inet->id = 0; 368 inet->inet_id = 0;
368 369
369 sock_init_data(sock, sk); 370 sock_init_data(sock, sk);
370 371
@@ -381,13 +382,13 @@ lookup_protocol:
381 382
382 sk_refcnt_debug_inc(sk); 383 sk_refcnt_debug_inc(sk);
383 384
384 if (inet->num) { 385 if (inet->inet_num) {
385 /* It assumes that any protocol which allows 386 /* It assumes that any protocol which allows
386 * the user to assign a number at socket 387 * the user to assign a number at socket
387 * creation time automatically 388 * creation time automatically
388 * shares. 389 * shares.
389 */ 390 */
390 inet->sport = htons(inet->num); 391 inet->inet_sport = htons(inet->inet_num);
391 /* Add to protocol hash chains. */ 392 /* Add to protocol hash chains. */
392 sk->sk_prot->hash(sk); 393 sk->sk_prot->hash(sk);
393 } 394 }
@@ -494,27 +495,27 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
494 495
495 /* Check these errors (active socket, double bind). */ 496 /* Check these errors (active socket, double bind). */
496 err = -EINVAL; 497 err = -EINVAL;
497 if (sk->sk_state != TCP_CLOSE || inet->num) 498 if (sk->sk_state != TCP_CLOSE || inet->inet_num)
498 goto out_release_sock; 499 goto out_release_sock;
499 500
500 inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; 501 inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
501 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) 502 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
502 inet->saddr = 0; /* Use device */ 503 inet->inet_saddr = 0; /* Use device */
503 504
504 /* Make sure we are allowed to bind here. */ 505 /* Make sure we are allowed to bind here. */
505 if (sk->sk_prot->get_port(sk, snum)) { 506 if (sk->sk_prot->get_port(sk, snum)) {
506 inet->saddr = inet->rcv_saddr = 0; 507 inet->inet_saddr = inet->inet_rcv_saddr = 0;
507 err = -EADDRINUSE; 508 err = -EADDRINUSE;
508 goto out_release_sock; 509 goto out_release_sock;
509 } 510 }
510 511
511 if (inet->rcv_saddr) 512 if (inet->inet_rcv_saddr)
512 sk->sk_userlocks |= SOCK_BINDADDR_LOCK; 513 sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
513 if (snum) 514 if (snum)
514 sk->sk_userlocks |= SOCK_BINDPORT_LOCK; 515 sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
515 inet->sport = htons(inet->num); 516 inet->inet_sport = htons(inet->inet_num);
516 inet->daddr = 0; 517 inet->inet_daddr = 0;
517 inet->dport = 0; 518 inet->inet_dport = 0;
518 sk_dst_reset(sk); 519 sk_dst_reset(sk);
519 err = 0; 520 err = 0;
520out_release_sock: 521out_release_sock:
@@ -532,7 +533,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
532 if (uaddr->sa_family == AF_UNSPEC) 533 if (uaddr->sa_family == AF_UNSPEC)
533 return sk->sk_prot->disconnect(sk, flags); 534 return sk->sk_prot->disconnect(sk, flags);
534 535
535 if (!inet_sk(sk)->num && inet_autobind(sk)) 536 if (!inet_sk(sk)->inet_num && inet_autobind(sk))
536 return -EAGAIN; 537 return -EAGAIN;
537 return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); 538 return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
538} 539}
@@ -685,21 +686,21 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
685{ 686{
686 struct sock *sk = sock->sk; 687 struct sock *sk = sock->sk;
687 struct inet_sock *inet = inet_sk(sk); 688 struct inet_sock *inet = inet_sk(sk);
688 struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; 689 DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr);
689 690
690 sin->sin_family = AF_INET; 691 sin->sin_family = AF_INET;
691 if (peer) { 692 if (peer) {
692 if (!inet->dport || 693 if (!inet->inet_dport ||
693 (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) && 694 (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
694 peer == 1)) 695 peer == 1))
695 return -ENOTCONN; 696 return -ENOTCONN;
696 sin->sin_port = inet->dport; 697 sin->sin_port = inet->inet_dport;
697 sin->sin_addr.s_addr = inet->daddr; 698 sin->sin_addr.s_addr = inet->inet_daddr;
698 } else { 699 } else {
699 __be32 addr = inet->rcv_saddr; 700 __be32 addr = inet->inet_rcv_saddr;
700 if (!addr) 701 if (!addr)
701 addr = inet->saddr; 702 addr = inet->inet_saddr;
702 sin->sin_port = inet->sport; 703 sin->sin_port = inet->inet_sport;
703 sin->sin_addr.s_addr = addr; 704 sin->sin_addr.s_addr = addr;
704 } 705 }
705 memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); 706 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
@@ -714,7 +715,7 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
714 struct sock *sk = sock->sk; 715 struct sock *sk = sock->sk;
715 716
716 /* We may need to bind the socket. */ 717 /* We may need to bind the socket. */
717 if (!inet_sk(sk)->num && inet_autobind(sk)) 718 if (!inet_sk(sk)->inet_num && inet_autobind(sk))
718 return -EAGAIN; 719 return -EAGAIN;
719 720
720 return sk->sk_prot->sendmsg(iocb, sk, msg, size); 721 return sk->sk_prot->sendmsg(iocb, sk, msg, size);
@@ -728,7 +729,7 @@ static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
728 struct sock *sk = sock->sk; 729 struct sock *sk = sock->sk;
729 730
730 /* We may need to bind the socket. */ 731 /* We may need to bind the socket. */
731 if (!inet_sk(sk)->num && inet_autobind(sk)) 732 if (!inet_sk(sk)->inet_num && inet_autobind(sk))
732 return -EAGAIN; 733 return -EAGAIN;
733 734
734 if (sk->sk_prot->sendpage) 735 if (sk->sk_prot->sendpage)
@@ -931,7 +932,7 @@ static const struct proto_ops inet_sockraw_ops = {
931#endif 932#endif
932}; 933};
933 934
934static struct net_proto_family inet_family_ops = { 935static const struct net_proto_family inet_family_ops = {
935 .family = PF_INET, 936 .family = PF_INET,
936 .create = inet_create, 937 .create = inet_create,
937 .owner = THIS_MODULE, 938 .owner = THIS_MODULE,
@@ -947,7 +948,6 @@ static struct inet_protosw inetsw_array[] =
947 .protocol = IPPROTO_TCP, 948 .protocol = IPPROTO_TCP,
948 .prot = &tcp_prot, 949 .prot = &tcp_prot,
949 .ops = &inet_stream_ops, 950 .ops = &inet_stream_ops,
950 .capability = -1,
951 .no_check = 0, 951 .no_check = 0,
952 .flags = INET_PROTOSW_PERMANENT | 952 .flags = INET_PROTOSW_PERMANENT |
953 INET_PROTOSW_ICSK, 953 INET_PROTOSW_ICSK,
@@ -958,7 +958,6 @@ static struct inet_protosw inetsw_array[] =
958 .protocol = IPPROTO_UDP, 958 .protocol = IPPROTO_UDP,
959 .prot = &udp_prot, 959 .prot = &udp_prot,
960 .ops = &inet_dgram_ops, 960 .ops = &inet_dgram_ops,
961 .capability = -1,
962 .no_check = UDP_CSUM_DEFAULT, 961 .no_check = UDP_CSUM_DEFAULT,
963 .flags = INET_PROTOSW_PERMANENT, 962 .flags = INET_PROTOSW_PERMANENT,
964 }, 963 },
@@ -969,7 +968,6 @@ static struct inet_protosw inetsw_array[] =
969 .protocol = IPPROTO_IP, /* wild card */ 968 .protocol = IPPROTO_IP, /* wild card */
970 .prot = &raw_prot, 969 .prot = &raw_prot,
971 .ops = &inet_sockraw_ops, 970 .ops = &inet_sockraw_ops,
972 .capability = CAP_NET_RAW,
973 .no_check = UDP_CSUM_DEFAULT, 971 .no_check = UDP_CSUM_DEFAULT,
974 .flags = INET_PROTOSW_REUSE, 972 .flags = INET_PROTOSW_REUSE,
975 } 973 }
@@ -1059,9 +1057,9 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1059 struct inet_sock *inet = inet_sk(sk); 1057 struct inet_sock *inet = inet_sk(sk);
1060 int err; 1058 int err;
1061 struct rtable *rt; 1059 struct rtable *rt;
1062 __be32 old_saddr = inet->saddr; 1060 __be32 old_saddr = inet->inet_saddr;
1063 __be32 new_saddr; 1061 __be32 new_saddr;
1064 __be32 daddr = inet->daddr; 1062 __be32 daddr = inet->inet_daddr;
1065 1063
1066 if (inet->opt && inet->opt->srr) 1064 if (inet->opt && inet->opt->srr)
1067 daddr = inet->opt->faddr; 1065 daddr = inet->opt->faddr;
@@ -1071,7 +1069,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1071 RT_CONN_FLAGS(sk), 1069 RT_CONN_FLAGS(sk),
1072 sk->sk_bound_dev_if, 1070 sk->sk_bound_dev_if,
1073 sk->sk_protocol, 1071 sk->sk_protocol,
1074 inet->sport, inet->dport, sk, 0); 1072 inet->inet_sport, inet->inet_dport, sk, 0);
1075 if (err) 1073 if (err)
1076 return err; 1074 return err;
1077 1075
@@ -1087,7 +1085,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1087 __func__, &old_saddr, &new_saddr); 1085 __func__, &old_saddr, &new_saddr);
1088 } 1086 }
1089 1087
1090 inet->saddr = inet->rcv_saddr = new_saddr; 1088 inet->inet_saddr = inet->inet_rcv_saddr = new_saddr;
1091 1089
1092 /* 1090 /*
1093 * XXX The only one ugly spot where we need to 1091 * XXX The only one ugly spot where we need to
@@ -1113,7 +1111,7 @@ int inet_sk_rebuild_header(struct sock *sk)
1113 return 0; 1111 return 0;
1114 1112
1115 /* Reroute. */ 1113 /* Reroute. */
1116 daddr = inet->daddr; 1114 daddr = inet->inet_daddr;
1117 if (inet->opt && inet->opt->srr) 1115 if (inet->opt && inet->opt->srr)
1118 daddr = inet->opt->faddr; 1116 daddr = inet->opt->faddr;
1119{ 1117{
@@ -1123,7 +1121,7 @@ int inet_sk_rebuild_header(struct sock *sk)
1123 .nl_u = { 1121 .nl_u = {
1124 .ip4_u = { 1122 .ip4_u = {
1125 .daddr = daddr, 1123 .daddr = daddr,
1126 .saddr = inet->saddr, 1124 .saddr = inet->inet_saddr,
1127 .tos = RT_CONN_FLAGS(sk), 1125 .tos = RT_CONN_FLAGS(sk),
1128 }, 1126 },
1129 }, 1127 },
@@ -1131,8 +1129,8 @@ int inet_sk_rebuild_header(struct sock *sk)
1131 .flags = inet_sk_flowi_flags(sk), 1129 .flags = inet_sk_flowi_flags(sk),
1132 .uli_u = { 1130 .uli_u = {
1133 .ports = { 1131 .ports = {
1134 .sport = inet->sport, 1132 .sport = inet->inet_sport,
1135 .dport = inet->dport, 1133 .dport = inet->inet_dport,
1136 }, 1134 },
1137 }, 1135 },
1138 }; 1136 };
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 5c662703eb1e..d07b0c1dd350 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -1,3 +1,4 @@
1#include <crypto/hash.h>
1#include <linux/err.h> 2#include <linux/err.h>
2#include <linux/module.h> 3#include <linux/module.h>
3#include <net/ip.h> 4#include <net/ip.h>
@@ -5,10 +6,67 @@
5#include <net/ah.h> 6#include <net/ah.h>
6#include <linux/crypto.h> 7#include <linux/crypto.h>
7#include <linux/pfkeyv2.h> 8#include <linux/pfkeyv2.h>
8#include <linux/spinlock.h> 9#include <linux/scatterlist.h>
9#include <net/icmp.h> 10#include <net/icmp.h>
10#include <net/protocol.h> 11#include <net/protocol.h>
11 12
13struct ah_skb_cb {
14 struct xfrm_skb_cb xfrm;
15 void *tmp;
16};
17
18#define AH_SKB_CB(__skb) ((struct ah_skb_cb *)&((__skb)->cb[0]))
19
20static void *ah_alloc_tmp(struct crypto_ahash *ahash, int nfrags,
21 unsigned int size)
22{
23 unsigned int len;
24
25 len = size + crypto_ahash_digestsize(ahash) +
26 (crypto_ahash_alignmask(ahash) &
27 ~(crypto_tfm_ctx_alignment() - 1));
28
29 len = ALIGN(len, crypto_tfm_ctx_alignment());
30
31 len += sizeof(struct ahash_request) + crypto_ahash_reqsize(ahash);
32 len = ALIGN(len, __alignof__(struct scatterlist));
33
34 len += sizeof(struct scatterlist) * nfrags;
35
36 return kmalloc(len, GFP_ATOMIC);
37}
38
39static inline u8 *ah_tmp_auth(void *tmp, unsigned int offset)
40{
41 return tmp + offset;
42}
43
44static inline u8 *ah_tmp_icv(struct crypto_ahash *ahash, void *tmp,
45 unsigned int offset)
46{
47 return PTR_ALIGN((u8 *)tmp + offset, crypto_ahash_alignmask(ahash) + 1);
48}
49
50static inline struct ahash_request *ah_tmp_req(struct crypto_ahash *ahash,
51 u8 *icv)
52{
53 struct ahash_request *req;
54
55 req = (void *)PTR_ALIGN(icv + crypto_ahash_digestsize(ahash),
56 crypto_tfm_ctx_alignment());
57
58 ahash_request_set_tfm(req, ahash);
59
60 return req;
61}
62
63static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash,
64 struct ahash_request *req)
65{
66 return (void *)ALIGN((unsigned long)(req + 1) +
67 crypto_ahash_reqsize(ahash),
68 __alignof__(struct scatterlist));
69}
12 70
13/* Clear mutable options and find final destination to substitute 71/* Clear mutable options and find final destination to substitute
14 * into IP header for icv calculation. Options are already checked 72 * into IP header for icv calculation. Options are already checked
@@ -54,20 +112,72 @@ static int ip_clear_mutable_options(struct iphdr *iph, __be32 *daddr)
54 return 0; 112 return 0;
55} 113}
56 114
115static void ah_output_done(struct crypto_async_request *base, int err)
116{
117 u8 *icv;
118 struct iphdr *iph;
119 struct sk_buff *skb = base->data;
120 struct xfrm_state *x = skb_dst(skb)->xfrm;
121 struct ah_data *ahp = x->data;
122 struct iphdr *top_iph = ip_hdr(skb);
123 struct ip_auth_hdr *ah = ip_auth_hdr(skb);
124 int ihl = ip_hdrlen(skb);
125
126 iph = AH_SKB_CB(skb)->tmp;
127 icv = ah_tmp_icv(ahp->ahash, iph, ihl);
128 memcpy(ah->auth_data, icv, ahp->icv_trunc_len);
129
130 top_iph->tos = iph->tos;
131 top_iph->ttl = iph->ttl;
132 top_iph->frag_off = iph->frag_off;
133 if (top_iph->ihl != 5) {
134 top_iph->daddr = iph->daddr;
135 memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr));
136 }
137
138 err = ah->nexthdr;
139
140 kfree(AH_SKB_CB(skb)->tmp);
141 xfrm_output_resume(skb, err);
142}
143
57static int ah_output(struct xfrm_state *x, struct sk_buff *skb) 144static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
58{ 145{
59 int err; 146 int err;
147 int nfrags;
148 int ihl;
149 u8 *icv;
150 struct sk_buff *trailer;
151 struct crypto_ahash *ahash;
152 struct ahash_request *req;
153 struct scatterlist *sg;
60 struct iphdr *iph, *top_iph; 154 struct iphdr *iph, *top_iph;
61 struct ip_auth_hdr *ah; 155 struct ip_auth_hdr *ah;
62 struct ah_data *ahp; 156 struct ah_data *ahp;
63 union { 157
64 struct iphdr iph; 158 ahp = x->data;
65 char buf[60]; 159 ahash = ahp->ahash;
66 } tmp_iph; 160
161 if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
162 goto out;
163 nfrags = err;
67 164
68 skb_push(skb, -skb_network_offset(skb)); 165 skb_push(skb, -skb_network_offset(skb));
166 ah = ip_auth_hdr(skb);
167 ihl = ip_hdrlen(skb);
168
169 err = -ENOMEM;
170 iph = ah_alloc_tmp(ahash, nfrags, ihl);
171 if (!iph)
172 goto out;
173
174 icv = ah_tmp_icv(ahash, iph, ihl);
175 req = ah_tmp_req(ahash, icv);
176 sg = ah_req_sg(ahash, req);
177
178 memset(ah->auth_data, 0, ahp->icv_trunc_len);
179
69 top_iph = ip_hdr(skb); 180 top_iph = ip_hdr(skb);
70 iph = &tmp_iph.iph;
71 181
72 iph->tos = top_iph->tos; 182 iph->tos = top_iph->tos;
73 iph->ttl = top_iph->ttl; 183 iph->ttl = top_iph->ttl;
@@ -78,10 +188,9 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
78 memcpy(iph+1, top_iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); 188 memcpy(iph+1, top_iph+1, top_iph->ihl*4 - sizeof(struct iphdr));
79 err = ip_clear_mutable_options(top_iph, &top_iph->daddr); 189 err = ip_clear_mutable_options(top_iph, &top_iph->daddr);
80 if (err) 190 if (err)
81 goto error; 191 goto out_free;
82 } 192 }
83 193
84 ah = ip_auth_hdr(skb);
85 ah->nexthdr = *skb_mac_header(skb); 194 ah->nexthdr = *skb_mac_header(skb);
86 *skb_mac_header(skb) = IPPROTO_AH; 195 *skb_mac_header(skb) = IPPROTO_AH;
87 196
@@ -91,20 +200,31 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
91 top_iph->ttl = 0; 200 top_iph->ttl = 0;
92 top_iph->check = 0; 201 top_iph->check = 0;
93 202
94 ahp = x->data;
95 ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; 203 ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2;
96 204
97 ah->reserved = 0; 205 ah->reserved = 0;
98 ah->spi = x->id.spi; 206 ah->spi = x->id.spi;
99 ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); 207 ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output);
100 208
101 spin_lock_bh(&x->lock); 209 sg_init_table(sg, nfrags);
102 err = ah_mac_digest(ahp, skb, ah->auth_data); 210 skb_to_sgvec(skb, sg, 0, skb->len);
103 memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len);
104 spin_unlock_bh(&x->lock);
105 211
106 if (err) 212 ahash_request_set_crypt(req, sg, icv, skb->len);
107 goto error; 213 ahash_request_set_callback(req, 0, ah_output_done, skb);
214
215 AH_SKB_CB(skb)->tmp = iph;
216
217 err = crypto_ahash_digest(req);
218 if (err) {
219 if (err == -EINPROGRESS)
220 goto out;
221
222 if (err == -EBUSY)
223 err = NET_XMIT_DROP;
224 goto out_free;
225 }
226
227 memcpy(ah->auth_data, icv, ahp->icv_trunc_len);
108 228
109 top_iph->tos = iph->tos; 229 top_iph->tos = iph->tos;
110 top_iph->ttl = iph->ttl; 230 top_iph->ttl = iph->ttl;
@@ -114,28 +234,67 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
114 memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); 234 memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr));
115 } 235 }
116 236
117 err = 0; 237out_free:
118 238 kfree(iph);
119error: 239out:
120 return err; 240 return err;
121} 241}
122 242
243static void ah_input_done(struct crypto_async_request *base, int err)
244{
245 u8 *auth_data;
246 u8 *icv;
247 struct iphdr *work_iph;
248 struct sk_buff *skb = base->data;
249 struct xfrm_state *x = xfrm_input_state(skb);
250 struct ah_data *ahp = x->data;
251 struct ip_auth_hdr *ah = ip_auth_hdr(skb);
252 int ihl = ip_hdrlen(skb);
253 int ah_hlen = (ah->hdrlen + 2) << 2;
254
255 work_iph = AH_SKB_CB(skb)->tmp;
256 auth_data = ah_tmp_auth(work_iph, ihl);
257 icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len);
258
259 err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0;
260 if (err)
261 goto out;
262
263 skb->network_header += ah_hlen;
264 memcpy(skb_network_header(skb), work_iph, ihl);
265 __skb_pull(skb, ah_hlen + ihl);
266 skb_set_transport_header(skb, -ihl);
267
268 err = ah->nexthdr;
269out:
270 kfree(AH_SKB_CB(skb)->tmp);
271 xfrm_input_resume(skb, err);
272}
273
123static int ah_input(struct xfrm_state *x, struct sk_buff *skb) 274static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
124{ 275{
125 int ah_hlen; 276 int ah_hlen;
126 int ihl; 277 int ihl;
127 int nexthdr; 278 int nexthdr;
128 int err = -EINVAL; 279 int nfrags;
129 struct iphdr *iph; 280 u8 *auth_data;
281 u8 *icv;
282 struct sk_buff *trailer;
283 struct crypto_ahash *ahash;
284 struct ahash_request *req;
285 struct scatterlist *sg;
286 struct iphdr *iph, *work_iph;
130 struct ip_auth_hdr *ah; 287 struct ip_auth_hdr *ah;
131 struct ah_data *ahp; 288 struct ah_data *ahp;
132 char work_buf[60]; 289 int err = -ENOMEM;
133 290
134 if (!pskb_may_pull(skb, sizeof(*ah))) 291 if (!pskb_may_pull(skb, sizeof(*ah)))
135 goto out; 292 goto out;
136 293
137 ah = (struct ip_auth_hdr *)skb->data; 294 ah = (struct ip_auth_hdr *)skb->data;
138 ahp = x->data; 295 ahp = x->data;
296 ahash = ahp->ahash;
297
139 nexthdr = ah->nexthdr; 298 nexthdr = ah->nexthdr;
140 ah_hlen = (ah->hdrlen + 2) << 2; 299 ah_hlen = (ah->hdrlen + 2) << 2;
141 300
@@ -156,9 +315,24 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
156 315
157 ah = (struct ip_auth_hdr *)skb->data; 316 ah = (struct ip_auth_hdr *)skb->data;
158 iph = ip_hdr(skb); 317 iph = ip_hdr(skb);
318 ihl = ip_hdrlen(skb);
319
320 if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
321 goto out;
322 nfrags = err;
323
324 work_iph = ah_alloc_tmp(ahash, nfrags, ihl + ahp->icv_trunc_len);
325 if (!work_iph)
326 goto out;
327
328 auth_data = ah_tmp_auth(work_iph, ihl);
329 icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len);
330 req = ah_tmp_req(ahash, icv);
331 sg = ah_req_sg(ahash, req);
159 332
160 ihl = skb->data - skb_network_header(skb); 333 memcpy(work_iph, iph, ihl);
161 memcpy(work_buf, iph, ihl); 334 memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
335 memset(ah->auth_data, 0, ahp->icv_trunc_len);
162 336
163 iph->ttl = 0; 337 iph->ttl = 0;
164 iph->tos = 0; 338 iph->tos = 0;
@@ -166,35 +340,44 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
166 iph->check = 0; 340 iph->check = 0;
167 if (ihl > sizeof(*iph)) { 341 if (ihl > sizeof(*iph)) {
168 __be32 dummy; 342 __be32 dummy;
169 if (ip_clear_mutable_options(iph, &dummy)) 343 err = ip_clear_mutable_options(iph, &dummy);
170 goto out; 344 if (err)
345 goto out_free;
171 } 346 }
172 347
173 spin_lock(&x->lock); 348 skb_push(skb, ihl);
174 {
175 u8 auth_data[MAX_AH_AUTH_LEN];
176 349
177 memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); 350 sg_init_table(sg, nfrags);
178 skb_push(skb, ihl); 351 skb_to_sgvec(skb, sg, 0, skb->len);
179 err = ah_mac_digest(ahp, skb, ah->auth_data); 352
180 if (err) 353 ahash_request_set_crypt(req, sg, icv, skb->len);
181 goto unlock; 354 ahash_request_set_callback(req, 0, ah_input_done, skb);
182 if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) 355
183 err = -EBADMSG; 356 AH_SKB_CB(skb)->tmp = work_iph;
357
358 err = crypto_ahash_digest(req);
359 if (err) {
360 if (err == -EINPROGRESS)
361 goto out;
362
363 if (err == -EBUSY)
364 err = NET_XMIT_DROP;
365 goto out_free;
184 } 366 }
185unlock:
186 spin_unlock(&x->lock);
187 367
368 err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0;
188 if (err) 369 if (err)
189 goto out; 370 goto out_free;
190 371
191 skb->network_header += ah_hlen; 372 skb->network_header += ah_hlen;
192 memcpy(skb_network_header(skb), work_buf, ihl); 373 memcpy(skb_network_header(skb), work_iph, ihl);
193 skb->transport_header = skb->network_header;
194 __skb_pull(skb, ah_hlen + ihl); 374 __skb_pull(skb, ah_hlen + ihl);
375 skb_set_transport_header(skb, -ihl);
195 376
196 return nexthdr; 377 err = nexthdr;
197 378
379out_free:
380 kfree (work_iph);
198out: 381out:
199 return err; 382 return err;
200} 383}
@@ -222,7 +405,7 @@ static int ah_init_state(struct xfrm_state *x)
222{ 405{
223 struct ah_data *ahp = NULL; 406 struct ah_data *ahp = NULL;
224 struct xfrm_algo_desc *aalg_desc; 407 struct xfrm_algo_desc *aalg_desc;
225 struct crypto_hash *tfm; 408 struct crypto_ahash *ahash;
226 409
227 if (!x->aalg) 410 if (!x->aalg)
228 goto error; 411 goto error;
@@ -231,31 +414,31 @@ static int ah_init_state(struct xfrm_state *x)
231 goto error; 414 goto error;
232 415
233 ahp = kzalloc(sizeof(*ahp), GFP_KERNEL); 416 ahp = kzalloc(sizeof(*ahp), GFP_KERNEL);
234 if (ahp == NULL) 417 if (!ahp)
235 return -ENOMEM; 418 return -ENOMEM;
236 419
237 tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC); 420 ahash = crypto_alloc_ahash(x->aalg->alg_name, 0, 0);
238 if (IS_ERR(tfm)) 421 if (IS_ERR(ahash))
239 goto error; 422 goto error;
240 423
241 ahp->tfm = tfm; 424 ahp->ahash = ahash;
242 if (crypto_hash_setkey(tfm, x->aalg->alg_key, 425 if (crypto_ahash_setkey(ahash, x->aalg->alg_key,
243 (x->aalg->alg_key_len + 7) / 8)) 426 (x->aalg->alg_key_len + 7) / 8))
244 goto error; 427 goto error;
245 428
246 /* 429 /*
247 * Lookup the algorithm description maintained by xfrm_algo, 430 * Lookup the algorithm description maintained by xfrm_algo,
248 * verify crypto transform properties, and store information 431 * verify crypto transform properties, and store information
249 * we need for AH processing. This lookup cannot fail here 432 * we need for AH processing. This lookup cannot fail here
250 * after a successful crypto_alloc_hash(). 433 * after a successful crypto_alloc_ahash().
251 */ 434 */
252 aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); 435 aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
253 BUG_ON(!aalg_desc); 436 BUG_ON(!aalg_desc);
254 437
255 if (aalg_desc->uinfo.auth.icv_fullbits/8 != 438 if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
256 crypto_hash_digestsize(tfm)) { 439 crypto_ahash_digestsize(ahash)) {
257 printk(KERN_INFO "AH: %s digestsize %u != %hu\n", 440 printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
258 x->aalg->alg_name, crypto_hash_digestsize(tfm), 441 x->aalg->alg_name, crypto_ahash_digestsize(ahash),
259 aalg_desc->uinfo.auth.icv_fullbits/8); 442 aalg_desc->uinfo.auth.icv_fullbits/8);
260 goto error; 443 goto error;
261 } 444 }
@@ -265,10 +448,6 @@ static int ah_init_state(struct xfrm_state *x)
265 448
266 BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN); 449 BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
267 450
268 ahp->work_icv = kmalloc(ahp->icv_full_len, GFP_KERNEL);
269 if (!ahp->work_icv)
270 goto error;
271
272 x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + 451 x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) +
273 ahp->icv_trunc_len); 452 ahp->icv_trunc_len);
274 if (x->props.mode == XFRM_MODE_TUNNEL) 453 if (x->props.mode == XFRM_MODE_TUNNEL)
@@ -279,8 +458,7 @@ static int ah_init_state(struct xfrm_state *x)
279 458
280error: 459error:
281 if (ahp) { 460 if (ahp) {
282 kfree(ahp->work_icv); 461 crypto_free_ahash(ahp->ahash);
283 crypto_free_hash(ahp->tfm);
284 kfree(ahp); 462 kfree(ahp);
285 } 463 }
286 return -EINVAL; 464 return -EINVAL;
@@ -293,8 +471,7 @@ static void ah_destroy(struct xfrm_state *x)
293 if (!ahp) 471 if (!ahp)
294 return; 472 return;
295 473
296 kfree(ahp->work_icv); 474 crypto_free_ahash(ahp->ahash);
297 crypto_free_hash(ahp->tfm);
298 kfree(ahp); 475 kfree(ahp);
299} 476}
300 477
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 039cc1ffe977..1e029dc75455 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -2017,7 +2017,7 @@ req_setattr_failure:
2017 * values on failure. 2017 * values on failure.
2018 * 2018 *
2019 */ 2019 */
2020int cipso_v4_delopt(struct ip_options **opt_ptr) 2020static int cipso_v4_delopt(struct ip_options **opt_ptr)
2021{ 2021{
2022 int hdr_delta = 0; 2022 int hdr_delta = 0;
2023 struct ip_options *opt = *opt_ptr; 2023 struct ip_options *opt = *opt_ptr;
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 5e6c5a0f3fde..fb2465811b48 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -39,7 +39,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
39 sk_dst_reset(sk); 39 sk_dst_reset(sk);
40 40
41 oif = sk->sk_bound_dev_if; 41 oif = sk->sk_bound_dev_if;
42 saddr = inet->saddr; 42 saddr = inet->inet_saddr;
43 if (ipv4_is_multicast(usin->sin_addr.s_addr)) { 43 if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
44 if (!oif) 44 if (!oif)
45 oif = inet->mc_index; 45 oif = inet->mc_index;
@@ -49,7 +49,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
49 err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr, 49 err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr,
50 RT_CONN_FLAGS(sk), oif, 50 RT_CONN_FLAGS(sk), oif,
51 sk->sk_protocol, 51 sk->sk_protocol,
52 inet->sport, usin->sin_port, sk, 1); 52 inet->inet_sport, usin->sin_port, sk, 1);
53 if (err) { 53 if (err) {
54 if (err == -ENETUNREACH) 54 if (err == -ENETUNREACH)
55 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 55 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
@@ -60,14 +60,14 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
60 ip_rt_put(rt); 60 ip_rt_put(rt);
61 return -EACCES; 61 return -EACCES;
62 } 62 }
63 if (!inet->saddr) 63 if (!inet->inet_saddr)
64 inet->saddr = rt->rt_src; /* Update source address */ 64 inet->inet_saddr = rt->rt_src; /* Update source address */
65 if (!inet->rcv_saddr) 65 if (!inet->inet_rcv_saddr)
66 inet->rcv_saddr = rt->rt_src; 66 inet->inet_rcv_saddr = rt->rt_src;
67 inet->daddr = rt->rt_dst; 67 inet->inet_daddr = rt->rt_dst;
68 inet->dport = usin->sin_port; 68 inet->inet_dport = usin->sin_port;
69 sk->sk_state = TCP_ESTABLISHED; 69 sk->sk_state = TCP_ESTABLISHED;
70 inet->id = jiffies; 70 inet->inet_id = jiffies;
71 71
72 sk_dst_set(sk, &rt->u.dst); 72 sk_dst_set(sk, &rt->u.dst);
73 return(0); 73 return(0);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5df2f6a0b0f0..7620382058a0 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -140,11 +140,11 @@ void in_dev_finish_destroy(struct in_device *idev)
140#endif 140#endif
141 dev_put(dev); 141 dev_put(dev);
142 if (!idev->dead) 142 if (!idev->dead)
143 printk("Freeing alive in_device %p\n", idev); 143 pr_err("Freeing alive in_device %p\n", idev);
144 else { 144 else
145 kfree(idev); 145 kfree(idev);
146 }
147} 146}
147EXPORT_SYMBOL(in_dev_finish_destroy);
148 148
149static struct in_device *inetdev_init(struct net_device *dev) 149static struct in_device *inetdev_init(struct net_device *dev)
150{ 150{
@@ -159,7 +159,8 @@ static struct in_device *inetdev_init(struct net_device *dev)
159 sizeof(in_dev->cnf)); 159 sizeof(in_dev->cnf));
160 in_dev->cnf.sysctl = NULL; 160 in_dev->cnf.sysctl = NULL;
161 in_dev->dev = dev; 161 in_dev->dev = dev;
162 if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL) 162 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
163 if (!in_dev->arp_parms)
163 goto out_kfree; 164 goto out_kfree;
164 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) 165 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
165 dev_disable_lro(dev); 166 dev_disable_lro(dev);
@@ -405,13 +406,15 @@ struct in_device *inetdev_by_index(struct net *net, int ifindex)
405{ 406{
406 struct net_device *dev; 407 struct net_device *dev;
407 struct in_device *in_dev = NULL; 408 struct in_device *in_dev = NULL;
408 read_lock(&dev_base_lock); 409
409 dev = __dev_get_by_index(net, ifindex); 410 rcu_read_lock();
411 dev = dev_get_by_index_rcu(net, ifindex);
410 if (dev) 412 if (dev)
411 in_dev = in_dev_get(dev); 413 in_dev = in_dev_get(dev);
412 read_unlock(&dev_base_lock); 414 rcu_read_unlock();
413 return in_dev; 415 return in_dev;
414} 416}
417EXPORT_SYMBOL(inetdev_by_index);
415 418
416/* Called only from RTNL semaphored context. No locks. */ 419/* Called only from RTNL semaphored context. No locks. */
417 420
@@ -557,7 +560,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
557 * Determine a default network mask, based on the IP address. 560 * Determine a default network mask, based on the IP address.
558 */ 561 */
559 562
560static __inline__ int inet_abc_len(__be32 addr) 563static inline int inet_abc_len(__be32 addr)
561{ 564{
562 int rc = -1; /* Something else, probably a multicast. */ 565 int rc = -1; /* Something else, probably a multicast. */
563 566
@@ -646,13 +649,15 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
646 rtnl_lock(); 649 rtnl_lock();
647 650
648 ret = -ENODEV; 651 ret = -ENODEV;
649 if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL) 652 dev = __dev_get_by_name(net, ifr.ifr_name);
653 if (!dev)
650 goto done; 654 goto done;
651 655
652 if (colon) 656 if (colon)
653 *colon = ':'; 657 *colon = ':';
654 658
655 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { 659 in_dev = __in_dev_get_rtnl(dev);
660 if (in_dev) {
656 if (tryaddrmatch) { 661 if (tryaddrmatch) {
657 /* Matthias Andree */ 662 /* Matthias Andree */
658 /* compare label and address (4.4BSD style) */ 663 /* compare label and address (4.4BSD style) */
@@ -720,7 +725,8 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
720 725
721 if (!ifa) { 726 if (!ifa) {
722 ret = -ENOBUFS; 727 ret = -ENOBUFS;
723 if ((ifa = inet_alloc_ifa()) == NULL) 728 ifa = inet_alloc_ifa();
729 if (!ifa)
724 break; 730 break;
725 if (colon) 731 if (colon)
726 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); 732 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
@@ -822,10 +828,10 @@ static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
822 struct ifreq ifr; 828 struct ifreq ifr;
823 int done = 0; 829 int done = 0;
824 830
825 if (!in_dev || (ifa = in_dev->ifa_list) == NULL) 831 if (!in_dev)
826 goto out; 832 goto out;
827 833
828 for (; ifa; ifa = ifa->ifa_next) { 834 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
829 if (!buf) { 835 if (!buf) {
830 done += sizeof(ifr); 836 done += sizeof(ifr);
831 continue; 837 continue;
@@ -875,36 +881,33 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
875 if (!addr) 881 if (!addr)
876 addr = ifa->ifa_local; 882 addr = ifa->ifa_local;
877 } endfor_ifa(in_dev); 883 } endfor_ifa(in_dev);
878no_in_dev:
879 rcu_read_unlock();
880 884
881 if (addr) 885 if (addr)
882 goto out; 886 goto out_unlock;
887no_in_dev:
883 888
884 /* Not loopback addresses on loopback should be preferred 889 /* Not loopback addresses on loopback should be preferred
885 in this case. It is importnat that lo is the first interface 890 in this case. It is importnat that lo is the first interface
886 in dev_base list. 891 in dev_base list.
887 */ 892 */
888 read_lock(&dev_base_lock); 893 for_each_netdev_rcu(net, dev) {
889 rcu_read_lock(); 894 in_dev = __in_dev_get_rcu(dev);
890 for_each_netdev(net, dev) { 895 if (!in_dev)
891 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
892 continue; 896 continue;
893 897
894 for_primary_ifa(in_dev) { 898 for_primary_ifa(in_dev) {
895 if (ifa->ifa_scope != RT_SCOPE_LINK && 899 if (ifa->ifa_scope != RT_SCOPE_LINK &&
896 ifa->ifa_scope <= scope) { 900 ifa->ifa_scope <= scope) {
897 addr = ifa->ifa_local; 901 addr = ifa->ifa_local;
898 goto out_unlock_both; 902 goto out_unlock;
899 } 903 }
900 } endfor_ifa(in_dev); 904 } endfor_ifa(in_dev);
901 } 905 }
902out_unlock_both: 906out_unlock:
903 read_unlock(&dev_base_lock);
904 rcu_read_unlock(); 907 rcu_read_unlock();
905out:
906 return addr; 908 return addr;
907} 909}
910EXPORT_SYMBOL(inet_select_addr);
908 911
909static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, 912static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
910 __be32 local, int scope) 913 __be32 local, int scope)
@@ -940,7 +943,7 @@ static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
940 } 943 }
941 } endfor_ifa(in_dev); 944 } endfor_ifa(in_dev);
942 945
943 return same? addr : 0; 946 return same ? addr : 0;
944} 947}
945 948
946/* 949/*
@@ -961,17 +964,16 @@ __be32 inet_confirm_addr(struct in_device *in_dev,
961 return confirm_addr_indev(in_dev, dst, local, scope); 964 return confirm_addr_indev(in_dev, dst, local, scope);
962 965
963 net = dev_net(in_dev->dev); 966 net = dev_net(in_dev->dev);
964 read_lock(&dev_base_lock);
965 rcu_read_lock(); 967 rcu_read_lock();
966 for_each_netdev(net, dev) { 968 for_each_netdev_rcu(net, dev) {
967 if ((in_dev = __in_dev_get_rcu(dev))) { 969 in_dev = __in_dev_get_rcu(dev);
970 if (in_dev) {
968 addr = confirm_addr_indev(in_dev, dst, local, scope); 971 addr = confirm_addr_indev(in_dev, dst, local, scope);
969 if (addr) 972 if (addr)
970 break; 973 break;
971 } 974 }
972 } 975 }
973 rcu_read_unlock(); 976 rcu_read_unlock();
974 read_unlock(&dev_base_lock);
975 977
976 return addr; 978 return addr;
977} 979}
@@ -984,14 +986,16 @@ int register_inetaddr_notifier(struct notifier_block *nb)
984{ 986{
985 return blocking_notifier_chain_register(&inetaddr_chain, nb); 987 return blocking_notifier_chain_register(&inetaddr_chain, nb);
986} 988}
989EXPORT_SYMBOL(register_inetaddr_notifier);
987 990
988int unregister_inetaddr_notifier(struct notifier_block *nb) 991int unregister_inetaddr_notifier(struct notifier_block *nb)
989{ 992{
990 return blocking_notifier_chain_unregister(&inetaddr_chain, nb); 993 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
991} 994}
995EXPORT_SYMBOL(unregister_inetaddr_notifier);
992 996
993/* Rename ifa_labels for a device name change. Make some effort to preserve existing 997/* Rename ifa_labels for a device name change. Make some effort to preserve
994 * alias numbering and to create unique labels if possible. 998 * existing alias numbering and to create unique labels if possible.
995*/ 999*/
996static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) 1000static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
997{ 1001{
@@ -1010,11 +1014,10 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1010 sprintf(old, ":%d", named); 1014 sprintf(old, ":%d", named);
1011 dot = old; 1015 dot = old;
1012 } 1016 }
1013 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) { 1017 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1014 strcat(ifa->ifa_label, dot); 1018 strcat(ifa->ifa_label, dot);
1015 } else { 1019 else
1016 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); 1020 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1017 }
1018skip: 1021skip:
1019 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); 1022 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1020 } 1023 }
@@ -1061,8 +1064,9 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1061 if (!inetdev_valid_mtu(dev->mtu)) 1064 if (!inetdev_valid_mtu(dev->mtu))
1062 break; 1065 break;
1063 if (dev->flags & IFF_LOOPBACK) { 1066 if (dev->flags & IFF_LOOPBACK) {
1064 struct in_ifaddr *ifa; 1067 struct in_ifaddr *ifa = inet_alloc_ifa();
1065 if ((ifa = inet_alloc_ifa()) != NULL) { 1068
1069 if (ifa) {
1066 ifa->ifa_local = 1070 ifa->ifa_local =
1067 ifa->ifa_address = htonl(INADDR_LOOPBACK); 1071 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1068 ifa->ifa_prefixlen = 8; 1072 ifa->ifa_prefixlen = 8;
@@ -1170,38 +1174,54 @@ nla_put_failure:
1170static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) 1174static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1171{ 1175{
1172 struct net *net = sock_net(skb->sk); 1176 struct net *net = sock_net(skb->sk);
1173 int idx, ip_idx; 1177 int h, s_h;
1178 int idx, s_idx;
1179 int ip_idx, s_ip_idx;
1174 struct net_device *dev; 1180 struct net_device *dev;
1175 struct in_device *in_dev; 1181 struct in_device *in_dev;
1176 struct in_ifaddr *ifa; 1182 struct in_ifaddr *ifa;
1177 int s_ip_idx, s_idx = cb->args[0]; 1183 struct hlist_head *head;
1184 struct hlist_node *node;
1178 1185
1179 s_ip_idx = ip_idx = cb->args[1]; 1186 s_h = cb->args[0];
1180 idx = 0; 1187 s_idx = idx = cb->args[1];
1181 for_each_netdev(net, dev) { 1188 s_ip_idx = ip_idx = cb->args[2];
1182 if (idx < s_idx) 1189
1183 goto cont; 1190 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1184 if (idx > s_idx) 1191 idx = 0;
1185 s_ip_idx = 0; 1192 head = &net->dev_index_head[h];
1186 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) 1193 rcu_read_lock();
1187 goto cont; 1194 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1188 1195 if (idx < s_idx)
1189 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; 1196 goto cont;
1190 ifa = ifa->ifa_next, ip_idx++) { 1197 if (idx > s_idx)
1191 if (ip_idx < s_ip_idx) 1198 s_ip_idx = 0;
1192 continue; 1199 in_dev = __in_dev_get_rcu(dev);
1193 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, 1200 if (!in_dev)
1201 goto cont;
1202
1203 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1204 ifa = ifa->ifa_next, ip_idx++) {
1205 if (ip_idx < s_ip_idx)
1206 continue;
1207 if (inet_fill_ifaddr(skb, ifa,
1208 NETLINK_CB(cb->skb).pid,
1194 cb->nlh->nlmsg_seq, 1209 cb->nlh->nlmsg_seq,
1195 RTM_NEWADDR, NLM_F_MULTI) <= 0) 1210 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1196 goto done; 1211 rcu_read_unlock();
1197 } 1212 goto done;
1213 }
1214 }
1198cont: 1215cont:
1199 idx++; 1216 idx++;
1217 }
1218 rcu_read_unlock();
1200 } 1219 }
1201 1220
1202done: 1221done:
1203 cb->args[0] = idx; 1222 cb->args[0] = h;
1204 cb->args[1] = ip_idx; 1223 cb->args[1] = idx;
1224 cb->args[2] = ip_idx;
1205 1225
1206 return skb->len; 1226 return skb->len;
1207} 1227}
@@ -1239,18 +1259,18 @@ static void devinet_copy_dflt_conf(struct net *net, int i)
1239{ 1259{
1240 struct net_device *dev; 1260 struct net_device *dev;
1241 1261
1242 read_lock(&dev_base_lock); 1262 rcu_read_lock();
1243 for_each_netdev(net, dev) { 1263 for_each_netdev_rcu(net, dev) {
1244 struct in_device *in_dev; 1264 struct in_device *in_dev;
1245 rcu_read_lock(); 1265
1246 in_dev = __in_dev_get_rcu(dev); 1266 in_dev = __in_dev_get_rcu(dev);
1247 if (in_dev && !test_bit(i, in_dev->cnf.state)) 1267 if (in_dev && !test_bit(i, in_dev->cnf.state))
1248 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i]; 1268 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1249 rcu_read_unlock();
1250 } 1269 }
1251 read_unlock(&dev_base_lock); 1270 rcu_read_unlock();
1252} 1271}
1253 1272
1273/* called with RTNL locked */
1254static void inet_forward_change(struct net *net) 1274static void inet_forward_change(struct net *net)
1255{ 1275{
1256 struct net_device *dev; 1276 struct net_device *dev;
@@ -1259,7 +1279,6 @@ static void inet_forward_change(struct net *net)
1259 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; 1279 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1260 IPV4_DEVCONF_DFLT(net, FORWARDING) = on; 1280 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1261 1281
1262 read_lock(&dev_base_lock);
1263 for_each_netdev(net, dev) { 1282 for_each_netdev(net, dev) {
1264 struct in_device *in_dev; 1283 struct in_device *in_dev;
1265 if (on) 1284 if (on)
@@ -1270,7 +1289,6 @@ static void inet_forward_change(struct net *net)
1270 IN_DEV_CONF_SET(in_dev, FORWARDING, on); 1289 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1271 rcu_read_unlock(); 1290 rcu_read_unlock();
1272 } 1291 }
1273 read_unlock(&dev_base_lock);
1274} 1292}
1275 1293
1276static int devinet_conf_proc(ctl_table *ctl, int write, 1294static int devinet_conf_proc(ctl_table *ctl, int write,
@@ -1680,8 +1698,3 @@ void __init devinet_init(void)
1680 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); 1698 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1681} 1699}
1682 1700
1683EXPORT_SYMBOL(in_dev_finish_destroy);
1684EXPORT_SYMBOL(inet_select_addr);
1685EXPORT_SYMBOL(inetdev_by_index);
1686EXPORT_SYMBOL(register_inetaddr_notifier);
1687EXPORT_SYMBOL(unregister_inetaddr_notifier);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index aa00398be80e..816e2180bd60 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -125,7 +125,7 @@ void fib_select_default(struct net *net,
125#endif 125#endif
126 tb = fib_get_table(net, table); 126 tb = fib_get_table(net, table);
127 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 127 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
128 tb->tb_select_default(tb, flp, res); 128 fib_table_select_default(tb, flp, res);
129} 129}
130 130
131static void fib_flush(struct net *net) 131static void fib_flush(struct net *net)
@@ -139,7 +139,7 @@ static void fib_flush(struct net *net)
139 for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 139 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
140 head = &net->ipv4.fib_table_hash[h]; 140 head = &net->ipv4.fib_table_hash[h];
141 hlist_for_each_entry(tb, node, head, tb_hlist) 141 hlist_for_each_entry(tb, node, head, tb_hlist)
142 flushed += tb->tb_flush(tb); 142 flushed += fib_table_flush(tb);
143 } 143 }
144 144
145 if (flushed) 145 if (flushed)
@@ -162,7 +162,7 @@ struct net_device * ip_dev_find(struct net *net, __be32 addr)
162#endif 162#endif
163 163
164 local_table = fib_get_table(net, RT_TABLE_LOCAL); 164 local_table = fib_get_table(net, RT_TABLE_LOCAL);
165 if (!local_table || local_table->tb_lookup(local_table, &fl, &res)) 165 if (!local_table || fib_table_lookup(local_table, &fl, &res))
166 return NULL; 166 return NULL;
167 if (res.type != RTN_LOCAL) 167 if (res.type != RTN_LOCAL)
168 goto out; 168 goto out;
@@ -200,7 +200,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
200 local_table = fib_get_table(net, RT_TABLE_LOCAL); 200 local_table = fib_get_table(net, RT_TABLE_LOCAL);
201 if (local_table) { 201 if (local_table) {
202 ret = RTN_UNICAST; 202 ret = RTN_UNICAST;
203 if (!local_table->tb_lookup(local_table, &fl, &res)) { 203 if (!fib_table_lookup(local_table, &fl, &res)) {
204 if (!dev || dev == res.fi->fib_dev) 204 if (!dev || dev == res.fi->fib_dev)
205 ret = res.type; 205 ret = res.type;
206 fib_res_put(&res); 206 fib_res_put(&res);
@@ -476,13 +476,13 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
476 if (cmd == SIOCDELRT) { 476 if (cmd == SIOCDELRT) {
477 tb = fib_get_table(net, cfg.fc_table); 477 tb = fib_get_table(net, cfg.fc_table);
478 if (tb) 478 if (tb)
479 err = tb->tb_delete(tb, &cfg); 479 err = fib_table_delete(tb, &cfg);
480 else 480 else
481 err = -ESRCH; 481 err = -ESRCH;
482 } else { 482 } else {
483 tb = fib_new_table(net, cfg.fc_table); 483 tb = fib_new_table(net, cfg.fc_table);
484 if (tb) 484 if (tb)
485 err = tb->tb_insert(tb, &cfg); 485 err = fib_table_insert(tb, &cfg);
486 else 486 else
487 err = -ENOBUFS; 487 err = -ENOBUFS;
488 } 488 }
@@ -597,7 +597,7 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *ar
597 goto errout; 597 goto errout;
598 } 598 }
599 599
600 err = tb->tb_delete(tb, &cfg); 600 err = fib_table_delete(tb, &cfg);
601errout: 601errout:
602 return err; 602 return err;
603} 603}
@@ -619,7 +619,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *ar
619 goto errout; 619 goto errout;
620 } 620 }
621 621
622 err = tb->tb_insert(tb, &cfg); 622 err = fib_table_insert(tb, &cfg);
623errout: 623errout:
624 return err; 624 return err;
625} 625}
@@ -650,7 +650,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
650 if (dumped) 650 if (dumped)
651 memset(&cb->args[2], 0, sizeof(cb->args) - 651 memset(&cb->args[2], 0, sizeof(cb->args) -
652 2 * sizeof(cb->args[0])); 652 2 * sizeof(cb->args[0]));
653 if (tb->tb_dump(tb, skb, cb) < 0) 653 if (fib_table_dump(tb, skb, cb) < 0)
654 goto out; 654 goto out;
655 dumped = 1; 655 dumped = 1;
656next: 656next:
@@ -704,9 +704,9 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
704 cfg.fc_scope = RT_SCOPE_HOST; 704 cfg.fc_scope = RT_SCOPE_HOST;
705 705
706 if (cmd == RTM_NEWROUTE) 706 if (cmd == RTM_NEWROUTE)
707 tb->tb_insert(tb, &cfg); 707 fib_table_insert(tb, &cfg);
708 else 708 else
709 tb->tb_delete(tb, &cfg); 709 fib_table_delete(tb, &cfg);
710} 710}
711 711
712void fib_add_ifaddr(struct in_ifaddr *ifa) 712void fib_add_ifaddr(struct in_ifaddr *ifa)
@@ -835,7 +835,7 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
835 local_bh_disable(); 835 local_bh_disable();
836 836
837 frn->tb_id = tb->tb_id; 837 frn->tb_id = tb->tb_id;
838 frn->err = tb->tb_lookup(tb, &fl, &res); 838 frn->err = fib_table_lookup(tb, &fl, &res);
839 839
840 if (!frn->err) { 840 if (!frn->err) {
841 frn->prefixlen = res.prefixlen; 841 frn->prefixlen = res.prefixlen;
@@ -1012,7 +1012,7 @@ static void __net_exit ip_fib_net_exit(struct net *net)
1012 head = &net->ipv4.fib_table_hash[i]; 1012 head = &net->ipv4.fib_table_hash[i];
1013 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { 1013 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1014 hlist_del(node); 1014 hlist_del(node);
1015 tb->tb_flush(tb); 1015 fib_table_flush(tb);
1016 kfree(tb); 1016 kfree(tb);
1017 } 1017 }
1018 } 1018 }
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index ecd39454235c..14972017b9c2 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -242,8 +242,8 @@ fn_new_zone(struct fn_hash *table, int z)
242 return fz; 242 return fz;
243} 243}
244 244
245static int 245int fib_table_lookup(struct fib_table *tb,
246fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) 246 const struct flowi *flp, struct fib_result *res)
247{ 247{
248 int err; 248 int err;
249 struct fn_zone *fz; 249 struct fn_zone *fz;
@@ -274,8 +274,8 @@ out:
274 return err; 274 return err;
275} 275}
276 276
277static void 277void fib_table_select_default(struct fib_table *tb,
278fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) 278 const struct flowi *flp, struct fib_result *res)
279{ 279{
280 int order, last_idx; 280 int order, last_idx;
281 struct hlist_node *node; 281 struct hlist_node *node;
@@ -366,7 +366,7 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
366 return NULL; 366 return NULL;
367} 367}
368 368
369static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) 369int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
370{ 370{
371 struct fn_hash *table = (struct fn_hash *) tb->tb_data; 371 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
372 struct fib_node *new_f = NULL; 372 struct fib_node *new_f = NULL;
@@ -544,8 +544,7 @@ out:
544 return err; 544 return err;
545} 545}
546 546
547 547int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
548static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg)
549{ 548{
550 struct fn_hash *table = (struct fn_hash *)tb->tb_data; 549 struct fn_hash *table = (struct fn_hash *)tb->tb_data;
551 struct fib_node *f; 550 struct fib_node *f;
@@ -662,7 +661,7 @@ static int fn_flush_list(struct fn_zone *fz, int idx)
662 return found; 661 return found;
663} 662}
664 663
665static int fn_hash_flush(struct fib_table *tb) 664int fib_table_flush(struct fib_table *tb)
666{ 665{
667 struct fn_hash *table = (struct fn_hash *) tb->tb_data; 666 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
668 struct fn_zone *fz; 667 struct fn_zone *fz;
@@ -743,7 +742,8 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
743 return skb->len; 742 return skb->len;
744} 743}
745 744
746static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) 745int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
746 struct netlink_callback *cb)
747{ 747{
748 int m, s_m; 748 int m, s_m;
749 struct fn_zone *fz; 749 struct fn_zone *fz;
@@ -787,12 +787,7 @@ struct fib_table *fib_hash_table(u32 id)
787 787
788 tb->tb_id = id; 788 tb->tb_id = id;
789 tb->tb_default = -1; 789 tb->tb_default = -1;
790 tb->tb_lookup = fn_hash_lookup; 790
791 tb->tb_insert = fn_hash_insert;
792 tb->tb_delete = fn_hash_delete;
793 tb->tb_flush = fn_hash_flush;
794 tb->tb_select_default = fn_hash_select_default;
795 tb->tb_dump = fn_hash_dump;
796 memset(tb->tb_data, 0, sizeof(struct fn_hash)); 791 memset(tb->tb_data, 0, sizeof(struct fn_hash));
797 return tb; 792 return tb;
798} 793}
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 92d9d97ec5e3..835262c2b867 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -94,7 +94,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
94 if ((tbl = fib_get_table(rule->fr_net, rule->table)) == NULL) 94 if ((tbl = fib_get_table(rule->fr_net, rule->table)) == NULL)
95 goto errout; 95 goto errout;
96 96
97 err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result); 97 err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result);
98 if (err > 0) 98 if (err > 0)
99 err = -EAGAIN; 99 err = -EAGAIN;
100errout: 100errout:
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 291bdf50a21f..af5d89792860 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1174,7 +1174,7 @@ done:
1174/* 1174/*
1175 * Caller must hold RTNL. 1175 * Caller must hold RTNL.
1176 */ 1176 */
1177static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) 1177int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1178{ 1178{
1179 struct trie *t = (struct trie *) tb->tb_data; 1179 struct trie *t = (struct trie *) tb->tb_data;
1180 struct fib_alias *fa, *new_fa; 1180 struct fib_alias *fa, *new_fa;
@@ -1373,8 +1373,8 @@ static int check_leaf(struct trie *t, struct leaf *l,
1373 return 1; 1373 return 1;
1374} 1374}
1375 1375
1376static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, 1376int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1377 struct fib_result *res) 1377 struct fib_result *res)
1378{ 1378{
1379 struct trie *t = (struct trie *) tb->tb_data; 1379 struct trie *t = (struct trie *) tb->tb_data;
1380 int ret; 1380 int ret;
@@ -1595,7 +1595,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l)
1595/* 1595/*
1596 * Caller must hold RTNL. 1596 * Caller must hold RTNL.
1597 */ 1597 */
1598static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) 1598int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
1599{ 1599{
1600 struct trie *t = (struct trie *) tb->tb_data; 1600 struct trie *t = (struct trie *) tb->tb_data;
1601 u32 key, mask; 1601 u32 key, mask;
@@ -1786,7 +1786,7 @@ static struct leaf *trie_leafindex(struct trie *t, int index)
1786/* 1786/*
1787 * Caller must hold RTNL. 1787 * Caller must hold RTNL.
1788 */ 1788 */
1789static int fn_trie_flush(struct fib_table *tb) 1789int fib_table_flush(struct fib_table *tb)
1790{ 1790{
1791 struct trie *t = (struct trie *) tb->tb_data; 1791 struct trie *t = (struct trie *) tb->tb_data;
1792 struct leaf *l, *ll = NULL; 1792 struct leaf *l, *ll = NULL;
@@ -1807,9 +1807,9 @@ static int fn_trie_flush(struct fib_table *tb)
1807 return found; 1807 return found;
1808} 1808}
1809 1809
1810static void fn_trie_select_default(struct fib_table *tb, 1810void fib_table_select_default(struct fib_table *tb,
1811 const struct flowi *flp, 1811 const struct flowi *flp,
1812 struct fib_result *res) 1812 struct fib_result *res)
1813{ 1813{
1814 struct trie *t = (struct trie *) tb->tb_data; 1814 struct trie *t = (struct trie *) tb->tb_data;
1815 int order, last_idx; 1815 int order, last_idx;
@@ -1952,8 +1952,8 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
1952 return skb->len; 1952 return skb->len;
1953} 1953}
1954 1954
1955static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, 1955int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
1956 struct netlink_callback *cb) 1956 struct netlink_callback *cb)
1957{ 1957{
1958 struct leaf *l; 1958 struct leaf *l;
1959 struct trie *t = (struct trie *) tb->tb_data; 1959 struct trie *t = (struct trie *) tb->tb_data;
@@ -2020,12 +2020,6 @@ struct fib_table *fib_hash_table(u32 id)
2020 2020
2021 tb->tb_id = id; 2021 tb->tb_id = id;
2022 tb->tb_default = -1; 2022 tb->tb_default = -1;
2023 tb->tb_lookup = fn_trie_lookup;
2024 tb->tb_insert = fn_trie_insert;
2025 tb->tb_delete = fn_trie_delete;
2026 tb->tb_flush = fn_trie_flush;
2027 tb->tb_select_default = fn_trie_select_default;
2028 tb->tb_dump = fn_trie_dump;
2029 2023
2030 t = (struct trie *) tb->tb_data; 2024 t = (struct trie *) tb->tb_data;
2031 memset(t, 0, sizeof(*t)); 2025 memset(t, 0, sizeof(*t));
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 5bc13fe816d1..fe11f60ce41b 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -501,15 +501,16 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
501 if (!(rt->rt_flags & RTCF_LOCAL)) { 501 if (!(rt->rt_flags & RTCF_LOCAL)) {
502 struct net_device *dev = NULL; 502 struct net_device *dev = NULL;
503 503
504 rcu_read_lock();
504 if (rt->fl.iif && 505 if (rt->fl.iif &&
505 net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) 506 net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
506 dev = dev_get_by_index(net, rt->fl.iif); 507 dev = dev_get_by_index_rcu(net, rt->fl.iif);
507 508
508 if (dev) { 509 if (dev)
509 saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); 510 saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
510 dev_put(dev); 511 else
511 } else
512 saddr = 0; 512 saddr = 0;
513 rcu_read_unlock();
513 } 514 }
514 515
515 tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | 516 tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
@@ -1165,6 +1166,10 @@ static int __net_init icmp_sk_init(struct net *net)
1165 sk->sk_sndbuf = 1166 sk->sk_sndbuf =
1166 (2 * ((64 * 1024) + sizeof(struct sk_buff))); 1167 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
1167 1168
1169 /*
1170 * Speedup sock_wfree()
1171 */
1172 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1168 inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT; 1173 inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
1169 } 1174 }
1170 1175
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d41e5de79a82..6110c6d6e613 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2311,9 +2311,10 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
2311 struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); 2311 struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
2312 2312
2313 state->in_dev = NULL; 2313 state->in_dev = NULL;
2314 for_each_netdev(net, state->dev) { 2314 for_each_netdev_rcu(net, state->dev) {
2315 struct in_device *in_dev; 2315 struct in_device *in_dev;
2316 in_dev = in_dev_get(state->dev); 2316
2317 in_dev = __in_dev_get_rcu(state->dev);
2317 if (!in_dev) 2318 if (!in_dev)
2318 continue; 2319 continue;
2319 read_lock(&in_dev->mc_list_lock); 2320 read_lock(&in_dev->mc_list_lock);
@@ -2323,7 +2324,6 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
2323 break; 2324 break;
2324 } 2325 }
2325 read_unlock(&in_dev->mc_list_lock); 2326 read_unlock(&in_dev->mc_list_lock);
2326 in_dev_put(in_dev);
2327 } 2327 }
2328 return im; 2328 return im;
2329} 2329}
@@ -2333,16 +2333,15 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li
2333 struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); 2333 struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
2334 im = im->next; 2334 im = im->next;
2335 while (!im) { 2335 while (!im) {
2336 if (likely(state->in_dev != NULL)) { 2336 if (likely(state->in_dev != NULL))
2337 read_unlock(&state->in_dev->mc_list_lock); 2337 read_unlock(&state->in_dev->mc_list_lock);
2338 in_dev_put(state->in_dev); 2338
2339 } 2339 state->dev = next_net_device_rcu(state->dev);
2340 state->dev = next_net_device(state->dev);
2341 if (!state->dev) { 2340 if (!state->dev) {
2342 state->in_dev = NULL; 2341 state->in_dev = NULL;
2343 break; 2342 break;
2344 } 2343 }
2345 state->in_dev = in_dev_get(state->dev); 2344 state->in_dev = __in_dev_get_rcu(state->dev);
2346 if (!state->in_dev) 2345 if (!state->in_dev)
2347 continue; 2346 continue;
2348 read_lock(&state->in_dev->mc_list_lock); 2347 read_lock(&state->in_dev->mc_list_lock);
@@ -2361,9 +2360,9 @@ static struct ip_mc_list *igmp_mc_get_idx(struct seq_file *seq, loff_t pos)
2361} 2360}
2362 2361
2363static void *igmp_mc_seq_start(struct seq_file *seq, loff_t *pos) 2362static void *igmp_mc_seq_start(struct seq_file *seq, loff_t *pos)
2364 __acquires(dev_base_lock) 2363 __acquires(rcu)
2365{ 2364{
2366 read_lock(&dev_base_lock); 2365 rcu_read_lock();
2367 return *pos ? igmp_mc_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2366 return *pos ? igmp_mc_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2368} 2367}
2369 2368
@@ -2379,16 +2378,15 @@ static void *igmp_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2379} 2378}
2380 2379
2381static void igmp_mc_seq_stop(struct seq_file *seq, void *v) 2380static void igmp_mc_seq_stop(struct seq_file *seq, void *v)
2382 __releases(dev_base_lock) 2381 __releases(rcu)
2383{ 2382{
2384 struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); 2383 struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
2385 if (likely(state->in_dev != NULL)) { 2384 if (likely(state->in_dev != NULL)) {
2386 read_unlock(&state->in_dev->mc_list_lock); 2385 read_unlock(&state->in_dev->mc_list_lock);
2387 in_dev_put(state->in_dev);
2388 state->in_dev = NULL; 2386 state->in_dev = NULL;
2389 } 2387 }
2390 state->dev = NULL; 2388 state->dev = NULL;
2391 read_unlock(&dev_base_lock); 2389 rcu_read_unlock();
2392} 2390}
2393 2391
2394static int igmp_mc_seq_show(struct seq_file *seq, void *v) 2392static int igmp_mc_seq_show(struct seq_file *seq, void *v)
@@ -2462,9 +2460,9 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
2462 2460
2463 state->idev = NULL; 2461 state->idev = NULL;
2464 state->im = NULL; 2462 state->im = NULL;
2465 for_each_netdev(net, state->dev) { 2463 for_each_netdev_rcu(net, state->dev) {
2466 struct in_device *idev; 2464 struct in_device *idev;
2467 idev = in_dev_get(state->dev); 2465 idev = __in_dev_get_rcu(state->dev);
2468 if (unlikely(idev == NULL)) 2466 if (unlikely(idev == NULL))
2469 continue; 2467 continue;
2470 read_lock(&idev->mc_list_lock); 2468 read_lock(&idev->mc_list_lock);
@@ -2480,7 +2478,6 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
2480 spin_unlock_bh(&im->lock); 2478 spin_unlock_bh(&im->lock);
2481 } 2479 }
2482 read_unlock(&idev->mc_list_lock); 2480 read_unlock(&idev->mc_list_lock);
2483 in_dev_put(idev);
2484 } 2481 }
2485 return psf; 2482 return psf;
2486} 2483}
@@ -2494,16 +2491,15 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l
2494 spin_unlock_bh(&state->im->lock); 2491 spin_unlock_bh(&state->im->lock);
2495 state->im = state->im->next; 2492 state->im = state->im->next;
2496 while (!state->im) { 2493 while (!state->im) {
2497 if (likely(state->idev != NULL)) { 2494 if (likely(state->idev != NULL))
2498 read_unlock(&state->idev->mc_list_lock); 2495 read_unlock(&state->idev->mc_list_lock);
2499 in_dev_put(state->idev); 2496
2500 } 2497 state->dev = next_net_device_rcu(state->dev);
2501 state->dev = next_net_device(state->dev);
2502 if (!state->dev) { 2498 if (!state->dev) {
2503 state->idev = NULL; 2499 state->idev = NULL;
2504 goto out; 2500 goto out;
2505 } 2501 }
2506 state->idev = in_dev_get(state->dev); 2502 state->idev = __in_dev_get_rcu(state->dev);
2507 if (!state->idev) 2503 if (!state->idev)
2508 continue; 2504 continue;
2509 read_lock(&state->idev->mc_list_lock); 2505 read_lock(&state->idev->mc_list_lock);
@@ -2528,8 +2524,9 @@ static struct ip_sf_list *igmp_mcf_get_idx(struct seq_file *seq, loff_t pos)
2528} 2524}
2529 2525
2530static void *igmp_mcf_seq_start(struct seq_file *seq, loff_t *pos) 2526static void *igmp_mcf_seq_start(struct seq_file *seq, loff_t *pos)
2527 __acquires(rcu)
2531{ 2528{
2532 read_lock(&dev_base_lock); 2529 rcu_read_lock();
2533 return *pos ? igmp_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2530 return *pos ? igmp_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2534} 2531}
2535 2532
@@ -2545,6 +2542,7 @@ static void *igmp_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2545} 2542}
2546 2543
2547static void igmp_mcf_seq_stop(struct seq_file *seq, void *v) 2544static void igmp_mcf_seq_stop(struct seq_file *seq, void *v)
2545 __releases(rcu)
2548{ 2546{
2549 struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq); 2547 struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
2550 if (likely(state->im != NULL)) { 2548 if (likely(state->im != NULL)) {
@@ -2553,11 +2551,10 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v)
2553 } 2551 }
2554 if (likely(state->idev != NULL)) { 2552 if (likely(state->idev != NULL)) {
2555 read_unlock(&state->idev->mc_list_lock); 2553 read_unlock(&state->idev->mc_list_lock);
2556 in_dev_put(state->idev);
2557 state->idev = NULL; 2554 state->idev = NULL;
2558 } 2555 }
2559 state->dev = NULL; 2556 state->dev = NULL;
2560 read_unlock(&dev_base_lock); 2557 rcu_read_unlock();
2561} 2558}
2562 2559
2563static int igmp_mcf_seq_show(struct seq_file *seq, void *v) 2560static int igmp_mcf_seq_show(struct seq_file *seq, void *v)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 537731b3bcb3..26fb50e91311 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -358,6 +358,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
358 const struct inet_request_sock *ireq = inet_rsk(req); 358 const struct inet_request_sock *ireq = inet_rsk(req);
359 struct ip_options *opt = inet_rsk(req)->opt; 359 struct ip_options *opt = inet_rsk(req)->opt;
360 struct flowi fl = { .oif = sk->sk_bound_dev_if, 360 struct flowi fl = { .oif = sk->sk_bound_dev_if,
361 .mark = sk->sk_mark,
361 .nl_u = { .ip4_u = 362 .nl_u = { .ip4_u =
362 { .daddr = ((opt && opt->srr) ? 363 { .daddr = ((opt && opt->srr) ?
363 opt->faddr : 364 opt->faddr :
@@ -367,7 +368,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
367 .proto = sk->sk_protocol, 368 .proto = sk->sk_protocol,
368 .flags = inet_sk_flowi_flags(sk), 369 .flags = inet_sk_flowi_flags(sk),
369 .uli_u = { .ports = 370 .uli_u = { .ports =
370 { .sport = inet_sk(sk)->sport, 371 { .sport = inet_sk(sk)->inet_sport,
371 .dport = ireq->rmt_port } } }; 372 .dport = ireq->rmt_port } } };
372 struct net *net = sock_net(sk); 373 struct net *net = sock_net(sk);
373 374
@@ -574,9 +575,9 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
574 newsk->sk_state = TCP_SYN_RECV; 575 newsk->sk_state = TCP_SYN_RECV;
575 newicsk->icsk_bind_hash = NULL; 576 newicsk->icsk_bind_hash = NULL;
576 577
577 inet_sk(newsk)->dport = inet_rsk(req)->rmt_port; 578 inet_sk(newsk)->inet_dport = inet_rsk(req)->rmt_port;
578 inet_sk(newsk)->num = ntohs(inet_rsk(req)->loc_port); 579 inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->loc_port);
579 inet_sk(newsk)->sport = inet_rsk(req)->loc_port; 580 inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port;
580 newsk->sk_write_space = sk_stream_write_space; 581 newsk->sk_write_space = sk_stream_write_space;
581 582
582 newicsk->icsk_retransmits = 0; 583 newicsk->icsk_retransmits = 0;
@@ -607,8 +608,8 @@ void inet_csk_destroy_sock(struct sock *sk)
607 /* It cannot be in hash table! */ 608 /* It cannot be in hash table! */
608 WARN_ON(!sk_unhashed(sk)); 609 WARN_ON(!sk_unhashed(sk));
609 610
610 /* If it has not 0 inet_sk(sk)->num, it must be bound */ 611 /* If it has not 0 inet_sk(sk)->inet_num, it must be bound */
611 WARN_ON(inet_sk(sk)->num && !inet_csk(sk)->icsk_bind_hash); 612 WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash);
612 613
613 sk->sk_prot->destroy(sk); 614 sk->sk_prot->destroy(sk);
614 615
@@ -643,8 +644,8 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
643 * after validation is complete. 644 * after validation is complete.
644 */ 645 */
645 sk->sk_state = TCP_LISTEN; 646 sk->sk_state = TCP_LISTEN;
646 if (!sk->sk_prot->get_port(sk, inet->num)) { 647 if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
647 inet->sport = htons(inet->num); 648 inet->inet_sport = htons(inet->inet_num);
648 649
649 sk_dst_reset(sk); 650 sk_dst_reset(sk);
650 sk->sk_prot->hash(sk); 651 sk->sk_prot->hash(sk);
@@ -720,8 +721,8 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
720 const struct inet_sock *inet = inet_sk(sk); 721 const struct inet_sock *inet = inet_sk(sk);
721 722
722 sin->sin_family = AF_INET; 723 sin->sin_family = AF_INET;
723 sin->sin_addr.s_addr = inet->daddr; 724 sin->sin_addr.s_addr = inet->inet_daddr;
724 sin->sin_port = inet->dport; 725 sin->sin_port = inet->inet_dport;
725} 726}
726 727
727EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); 728EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index a706a47f4dbb..bdb78dd180ce 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -116,10 +116,10 @@ static int inet_csk_diag_fill(struct sock *sk,
116 r->id.idiag_cookie[0] = (u32)(unsigned long)sk; 116 r->id.idiag_cookie[0] = (u32)(unsigned long)sk;
117 r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); 117 r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
118 118
119 r->id.idiag_sport = inet->sport; 119 r->id.idiag_sport = inet->inet_sport;
120 r->id.idiag_dport = inet->dport; 120 r->id.idiag_dport = inet->inet_dport;
121 r->id.idiag_src[0] = inet->rcv_saddr; 121 r->id.idiag_src[0] = inet->inet_rcv_saddr;
122 r->id.idiag_dst[0] = inet->daddr; 122 r->id.idiag_dst[0] = inet->inet_daddr;
123 123
124#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 124#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
125 if (r->idiag_family == AF_INET6) { 125 if (r->idiag_family == AF_INET6) {
@@ -504,11 +504,11 @@ static int inet_csk_diag_dump(struct sock *sk,
504 } else 504 } else
505#endif 505#endif
506 { 506 {
507 entry.saddr = &inet->rcv_saddr; 507 entry.saddr = &inet->inet_rcv_saddr;
508 entry.daddr = &inet->daddr; 508 entry.daddr = &inet->inet_daddr;
509 } 509 }
510 entry.sport = inet->num; 510 entry.sport = inet->inet_num;
511 entry.dport = ntohs(inet->dport); 511 entry.dport = ntohs(inet->inet_dport);
512 entry.userlocks = sk->sk_userlocks; 512 entry.userlocks = sk->sk_userlocks;
513 513
514 if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) 514 if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
@@ -584,7 +584,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
584 if (tmo < 0) 584 if (tmo < 0)
585 tmo = 0; 585 tmo = 0;
586 586
587 r->id.idiag_sport = inet->sport; 587 r->id.idiag_sport = inet->inet_sport;
588 r->id.idiag_dport = ireq->rmt_port; 588 r->id.idiag_dport = ireq->rmt_port;
589 r->id.idiag_src[0] = ireq->loc_addr; 589 r->id.idiag_src[0] = ireq->loc_addr;
590 r->id.idiag_dst[0] = ireq->rmt_addr; 590 r->id.idiag_dst[0] = ireq->rmt_addr;
@@ -639,7 +639,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
639 639
640 if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { 640 if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
641 bc = (struct rtattr *)(r + 1); 641 bc = (struct rtattr *)(r + 1);
642 entry.sport = inet->num; 642 entry.sport = inet->inet_num;
643 entry.userlocks = sk->sk_userlocks; 643 entry.userlocks = sk->sk_userlocks;
644 } 644 }
645 645
@@ -732,7 +732,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
732 continue; 732 continue;
733 } 733 }
734 734
735 if (r->id.idiag_sport != inet->sport && 735 if (r->id.idiag_sport != inet->inet_sport &&
736 r->id.idiag_sport) 736 r->id.idiag_sport)
737 goto next_listen; 737 goto next_listen;
738 738
@@ -774,7 +774,7 @@ skip_listen_ht:
774 if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) 774 if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV)))
775 goto unlock; 775 goto unlock;
776 776
777 for (i = s_i; i < hashinfo->ehash_size; i++) { 777 for (i = s_i; i <= hashinfo->ehash_mask; i++) {
778 struct inet_ehash_bucket *head = &hashinfo->ehash[i]; 778 struct inet_ehash_bucket *head = &hashinfo->ehash[i];
779 spinlock_t *lock = inet_ehash_lockp(hashinfo, i); 779 spinlock_t *lock = inet_ehash_lockp(hashinfo, i);
780 struct sock *sk; 780 struct sock *sk;
@@ -797,10 +797,10 @@ skip_listen_ht:
797 goto next_normal; 797 goto next_normal;
798 if (!(r->idiag_states & (1 << sk->sk_state))) 798 if (!(r->idiag_states & (1 << sk->sk_state)))
799 goto next_normal; 799 goto next_normal;
800 if (r->id.idiag_sport != inet->sport && 800 if (r->id.idiag_sport != inet->inet_sport &&
801 r->id.idiag_sport) 801 r->id.idiag_sport)
802 goto next_normal; 802 goto next_normal;
803 if (r->id.idiag_dport != inet->dport && 803 if (r->id.idiag_dport != inet->inet_dport &&
804 r->id.idiag_dport) 804 r->id.idiag_dport)
805 goto next_normal; 805 goto next_normal;
806 if (inet_csk_diag_dump(sk, skb, cb) < 0) { 806 if (inet_csk_diag_dump(sk, skb, cb) < 0) {
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 625cc5f64c94..47ad7aab51e3 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -64,7 +64,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
64 64
65 atomic_inc(&hashinfo->bsockets); 65 atomic_inc(&hashinfo->bsockets);
66 66
67 inet_sk(sk)->num = snum; 67 inet_sk(sk)->inet_num = snum;
68 sk_add_bind_node(sk, &tb->owners); 68 sk_add_bind_node(sk, &tb->owners);
69 tb->num_owners++; 69 tb->num_owners++;
70 inet_csk(sk)->icsk_bind_hash = tb; 70 inet_csk(sk)->icsk_bind_hash = tb;
@@ -76,7 +76,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
76static void __inet_put_port(struct sock *sk) 76static void __inet_put_port(struct sock *sk)
77{ 77{
78 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 78 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
79 const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->num, 79 const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
80 hashinfo->bhash_size); 80 hashinfo->bhash_size);
81 struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; 81 struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
82 struct inet_bind_bucket *tb; 82 struct inet_bind_bucket *tb;
@@ -88,7 +88,7 @@ static void __inet_put_port(struct sock *sk)
88 __sk_del_bind_node(sk); 88 __sk_del_bind_node(sk);
89 tb->num_owners--; 89 tb->num_owners--;
90 inet_csk(sk)->icsk_bind_hash = NULL; 90 inet_csk(sk)->icsk_bind_hash = NULL;
91 inet_sk(sk)->num = 0; 91 inet_sk(sk)->inet_num = 0;
92 inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); 92 inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
93 spin_unlock(&head->lock); 93 spin_unlock(&head->lock);
94} 94}
@@ -105,7 +105,7 @@ EXPORT_SYMBOL(inet_put_port);
105void __inet_inherit_port(struct sock *sk, struct sock *child) 105void __inet_inherit_port(struct sock *sk, struct sock *child)
106{ 106{
107 struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; 107 struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
108 const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->num, 108 const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->inet_num,
109 table->bhash_size); 109 table->bhash_size);
110 struct inet_bind_hashbucket *head = &table->bhash[bhash]; 110 struct inet_bind_hashbucket *head = &table->bhash[bhash];
111 struct inet_bind_bucket *tb; 111 struct inet_bind_bucket *tb;
@@ -126,9 +126,9 @@ static inline int compute_score(struct sock *sk, struct net *net,
126 int score = -1; 126 int score = -1;
127 struct inet_sock *inet = inet_sk(sk); 127 struct inet_sock *inet = inet_sk(sk);
128 128
129 if (net_eq(sock_net(sk), net) && inet->num == hnum && 129 if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&
130 !ipv6_only_sock(sk)) { 130 !ipv6_only_sock(sk)) {
131 __be32 rcv_saddr = inet->rcv_saddr; 131 __be32 rcv_saddr = inet->inet_rcv_saddr;
132 score = sk->sk_family == PF_INET ? 1 : 0; 132 score = sk->sk_family == PF_INET ? 1 : 0;
133 if (rcv_saddr) { 133 if (rcv_saddr) {
134 if (rcv_saddr != daddr) 134 if (rcv_saddr != daddr)
@@ -209,7 +209,7 @@ struct sock * __inet_lookup_established(struct net *net,
209 * have wildcards anyways. 209 * have wildcards anyways.
210 */ 210 */
211 unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); 211 unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
212 unsigned int slot = hash & (hashinfo->ehash_size - 1); 212 unsigned int slot = hash & hashinfo->ehash_mask;
213 struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; 213 struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
214 214
215 rcu_read_lock(); 215 rcu_read_lock();
@@ -273,13 +273,14 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
273{ 273{
274 struct inet_hashinfo *hinfo = death_row->hashinfo; 274 struct inet_hashinfo *hinfo = death_row->hashinfo;
275 struct inet_sock *inet = inet_sk(sk); 275 struct inet_sock *inet = inet_sk(sk);
276 __be32 daddr = inet->rcv_saddr; 276 __be32 daddr = inet->inet_rcv_saddr;
277 __be32 saddr = inet->daddr; 277 __be32 saddr = inet->inet_daddr;
278 int dif = sk->sk_bound_dev_if; 278 int dif = sk->sk_bound_dev_if;
279 INET_ADDR_COOKIE(acookie, saddr, daddr) 279 INET_ADDR_COOKIE(acookie, saddr, daddr)
280 const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); 280 const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
281 struct net *net = sock_net(sk); 281 struct net *net = sock_net(sk);
282 unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); 282 unsigned int hash = inet_ehashfn(net, daddr, lport,
283 saddr, inet->inet_dport);
283 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); 284 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
284 spinlock_t *lock = inet_ehash_lockp(hinfo, hash); 285 spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
285 struct sock *sk2; 286 struct sock *sk2;
@@ -312,8 +313,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
312unique: 313unique:
313 /* Must record num and sport now. Otherwise we will see 314 /* Must record num and sport now. Otherwise we will see
314 * in hash table socket with a funny identity. */ 315 * in hash table socket with a funny identity. */
315 inet->num = lport; 316 inet->inet_num = lport;
316 inet->sport = htons(lport); 317 inet->inet_sport = htons(lport);
317 sk->sk_hash = hash; 318 sk->sk_hash = hash;
318 WARN_ON(!sk_unhashed(sk)); 319 WARN_ON(!sk_unhashed(sk));
319 __sk_nulls_add_node_rcu(sk, &head->chain); 320 __sk_nulls_add_node_rcu(sk, &head->chain);
@@ -341,8 +342,9 @@ not_unique:
341static inline u32 inet_sk_port_offset(const struct sock *sk) 342static inline u32 inet_sk_port_offset(const struct sock *sk)
342{ 343{
343 const struct inet_sock *inet = inet_sk(sk); 344 const struct inet_sock *inet = inet_sk(sk);
344 return secure_ipv4_port_ephemeral(inet->rcv_saddr, inet->daddr, 345 return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr,
345 inet->dport); 346 inet->inet_daddr,
347 inet->inet_dport);
346} 348}
347 349
348void __inet_hash_nolisten(struct sock *sk) 350void __inet_hash_nolisten(struct sock *sk)
@@ -424,7 +426,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
424 void (*hash)(struct sock *sk)) 426 void (*hash)(struct sock *sk))
425{ 427{
426 struct inet_hashinfo *hinfo = death_row->hashinfo; 428 struct inet_hashinfo *hinfo = death_row->hashinfo;
427 const unsigned short snum = inet_sk(sk)->num; 429 const unsigned short snum = inet_sk(sk)->inet_num;
428 struct inet_bind_hashbucket *head; 430 struct inet_bind_hashbucket *head;
429 struct inet_bind_bucket *tb; 431 struct inet_bind_bucket *tb;
430 int ret; 432 int ret;
@@ -485,7 +487,7 @@ ok:
485 /* Head lock still held and bh's disabled */ 487 /* Head lock still held and bh's disabled */
486 inet_bind_hash(sk, tb, port); 488 inet_bind_hash(sk, tb, port);
487 if (sk_unhashed(sk)) { 489 if (sk_unhashed(sk)) {
488 inet_sk(sk)->sport = htons(port); 490 inet_sk(sk)->inet_sport = htons(port);
489 hash(sk); 491 hash(sk);
490 } 492 }
491 spin_unlock(&head->lock); 493 spin_unlock(&head->lock);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 13f0781f35cd..1f5d508bb18b 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -86,7 +86,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
86 Note, that any socket with inet->num != 0 MUST be bound in 86 Note, that any socket with inet->num != 0 MUST be bound in
87 binding cache, even if it is closed. 87 binding cache, even if it is closed.
88 */ 88 */
89 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num, 89 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
90 hashinfo->bhash_size)]; 90 hashinfo->bhash_size)];
91 spin_lock(&bhead->lock); 91 spin_lock(&bhead->lock);
92 tw->tw_tb = icsk->icsk_bind_hash; 92 tw->tw_tb = icsk->icsk_bind_hash;
@@ -124,14 +124,14 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
124 kmemcheck_annotate_bitfield(tw, flags); 124 kmemcheck_annotate_bitfield(tw, flags);
125 125
126 /* Give us an identity. */ 126 /* Give us an identity. */
127 tw->tw_daddr = inet->daddr; 127 tw->tw_daddr = inet->inet_daddr;
128 tw->tw_rcv_saddr = inet->rcv_saddr; 128 tw->tw_rcv_saddr = inet->inet_rcv_saddr;
129 tw->tw_bound_dev_if = sk->sk_bound_dev_if; 129 tw->tw_bound_dev_if = sk->sk_bound_dev_if;
130 tw->tw_num = inet->num; 130 tw->tw_num = inet->inet_num;
131 tw->tw_state = TCP_TIME_WAIT; 131 tw->tw_state = TCP_TIME_WAIT;
132 tw->tw_substate = state; 132 tw->tw_substate = state;
133 tw->tw_sport = inet->sport; 133 tw->tw_sport = inet->inet_sport;
134 tw->tw_dport = inet->dport; 134 tw->tw_dport = inet->inet_dport;
135 tw->tw_family = sk->sk_family; 135 tw->tw_family = sk->sk_family;
136 tw->tw_reuse = sk->sk_reuse; 136 tw->tw_reuse = sk->sk_reuse;
137 tw->tw_hash = sk->sk_hash; 137 tw->tw_hash = sk->sk_hash;
@@ -430,7 +430,7 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo,
430 int h; 430 int h;
431 431
432 local_bh_disable(); 432 local_bh_disable();
433 for (h = 0; h < (hashinfo->ehash_size); h++) { 433 for (h = 0; h <= hashinfo->ehash_mask; h++) {
434 struct inet_ehash_bucket *head = 434 struct inet_ehash_bucket *head =
435 inet_ehash_bucket(hashinfo, h); 435 inet_ehash_bucket(hashinfo, h);
436 spinlock_t *lock = inet_ehash_lockp(hashinfo, h); 436 spinlock_t *lock = inet_ehash_lockp(hashinfo, h);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index b1fbe18feb5a..6bcfe52a9c87 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -67,9 +67,6 @@
67 * ip_id_count: idlock 67 * ip_id_count: idlock
68 */ 68 */
69 69
70/* Exported for inet_getid inline function. */
71DEFINE_SPINLOCK(inet_peer_idlock);
72
73static struct kmem_cache *peer_cachep __read_mostly; 70static struct kmem_cache *peer_cachep __read_mostly;
74 71
75#define node_height(x) x->avl_height 72#define node_height(x) x->avl_height
@@ -390,7 +387,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
390 n->v4daddr = daddr; 387 n->v4daddr = daddr;
391 atomic_set(&n->refcnt, 1); 388 atomic_set(&n->refcnt, 1);
392 atomic_set(&n->rid, 0); 389 atomic_set(&n->rid, 0);
393 n->ip_id_count = secure_ip_id(daddr); 390 atomic_set(&n->ip_id_count, secure_ip_id(daddr));
394 n->tcp_ts_stamp = 0; 391 n->tcp_ts_stamp = 0;
395 392
396 write_lock_bh(&peer_pool_lock); 393 write_lock_bh(&peer_pool_lock);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 575f9bd51ccd..b007f8af6e1f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -206,10 +206,11 @@ static void ip_expire(unsigned long arg)
206 struct sk_buff *head = qp->q.fragments; 206 struct sk_buff *head = qp->q.fragments;
207 207
208 /* Send an ICMP "Fragment Reassembly Timeout" message. */ 208 /* Send an ICMP "Fragment Reassembly Timeout" message. */
209 if ((head->dev = dev_get_by_index(net, qp->iif)) != NULL) { 209 rcu_read_lock();
210 head->dev = dev_get_by_index_rcu(net, qp->iif);
211 if (head->dev)
210 icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); 212 icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
211 dev_put(head->dev); 213 rcu_read_unlock();
212 }
213 } 214 }
214out: 215out:
215 spin_unlock(&qp->q.lock); 216 spin_unlock(&qp->q.lock);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 143333852624..a7de9e3a8f18 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -156,8 +156,13 @@ struct ipgre_net {
156#define tunnels_r tunnels[2] 156#define tunnels_r tunnels[2]
157#define tunnels_l tunnels[1] 157#define tunnels_l tunnels[1]
158#define tunnels_wc tunnels[0] 158#define tunnels_wc tunnels[0]
159/*
160 * Locking : hash tables are protected by RCU and a spinlock
161 */
162static DEFINE_SPINLOCK(ipgre_lock);
159 163
160static DEFINE_RWLOCK(ipgre_lock); 164#define for_each_ip_tunnel_rcu(start) \
165 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
161 166
162/* Given src, dst and key, find appropriate for input tunnel. */ 167/* Given src, dst and key, find appropriate for input tunnel. */
163 168
@@ -175,7 +180,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
175 ARPHRD_ETHER : ARPHRD_IPGRE; 180 ARPHRD_ETHER : ARPHRD_IPGRE;
176 int score, cand_score = 4; 181 int score, cand_score = 4;
177 182
178 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { 183 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
179 if (local != t->parms.iph.saddr || 184 if (local != t->parms.iph.saddr ||
180 remote != t->parms.iph.daddr || 185 remote != t->parms.iph.daddr ||
181 key != t->parms.i_key || 186 key != t->parms.i_key ||
@@ -200,7 +205,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
200 } 205 }
201 } 206 }
202 207
203 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { 208 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
204 if (remote != t->parms.iph.daddr || 209 if (remote != t->parms.iph.daddr ||
205 key != t->parms.i_key || 210 key != t->parms.i_key ||
206 !(t->dev->flags & IFF_UP)) 211 !(t->dev->flags & IFF_UP))
@@ -224,7 +229,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
224 } 229 }
225 } 230 }
226 231
227 for (t = ign->tunnels_l[h1]; t; t = t->next) { 232 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
228 if ((local != t->parms.iph.saddr && 233 if ((local != t->parms.iph.saddr &&
229 (local != t->parms.iph.daddr || 234 (local != t->parms.iph.daddr ||
230 !ipv4_is_multicast(local))) || 235 !ipv4_is_multicast(local))) ||
@@ -250,7 +255,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
250 } 255 }
251 } 256 }
252 257
253 for (t = ign->tunnels_wc[h1]; t; t = t->next) { 258 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
254 if (t->parms.i_key != key || 259 if (t->parms.i_key != key ||
255 !(t->dev->flags & IFF_UP)) 260 !(t->dev->flags & IFF_UP))
256 continue; 261 continue;
@@ -276,8 +281,9 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
276 if (cand != NULL) 281 if (cand != NULL)
277 return cand; 282 return cand;
278 283
279 if (ign->fb_tunnel_dev->flags & IFF_UP) 284 dev = ign->fb_tunnel_dev;
280 return netdev_priv(ign->fb_tunnel_dev); 285 if (dev->flags & IFF_UP)
286 return netdev_priv(dev);
281 287
282 return NULL; 288 return NULL;
283} 289}
@@ -311,10 +317,10 @@ static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
311{ 317{
312 struct ip_tunnel **tp = ipgre_bucket(ign, t); 318 struct ip_tunnel **tp = ipgre_bucket(ign, t);
313 319
320 spin_lock_bh(&ipgre_lock);
314 t->next = *tp; 321 t->next = *tp;
315 write_lock_bh(&ipgre_lock); 322 rcu_assign_pointer(*tp, t);
316 *tp = t; 323 spin_unlock_bh(&ipgre_lock);
317 write_unlock_bh(&ipgre_lock);
318} 324}
319 325
320static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) 326static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
@@ -323,9 +329,9 @@ static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
323 329
324 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) { 330 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
325 if (t == *tp) { 331 if (t == *tp) {
326 write_lock_bh(&ipgre_lock); 332 spin_lock_bh(&ipgre_lock);
327 *tp = t->next; 333 *tp = t->next;
328 write_unlock_bh(&ipgre_lock); 334 spin_unlock_bh(&ipgre_lock);
329 break; 335 break;
330 } 336 }
331 } 337 }
@@ -476,7 +482,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
476 break; 482 break;
477 } 483 }
478 484
479 read_lock(&ipgre_lock); 485 rcu_read_lock();
480 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, 486 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
481 flags & GRE_KEY ? 487 flags & GRE_KEY ?
482 *(((__be32 *)p) + (grehlen / 4) - 1) : 0, 488 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
@@ -494,7 +500,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
494 t->err_count = 1; 500 t->err_count = 1;
495 t->err_time = jiffies; 501 t->err_time = jiffies;
496out: 502out:
497 read_unlock(&ipgre_lock); 503 rcu_read_unlock();
498 return; 504 return;
499} 505}
500 506
@@ -573,7 +579,7 @@ static int ipgre_rcv(struct sk_buff *skb)
573 579
574 gre_proto = *(__be16 *)(h + 2); 580 gre_proto = *(__be16 *)(h + 2);
575 581
576 read_lock(&ipgre_lock); 582 rcu_read_lock();
577 if ((tunnel = ipgre_tunnel_lookup(skb->dev, 583 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
578 iph->saddr, iph->daddr, key, 584 iph->saddr, iph->daddr, key,
579 gre_proto))) { 585 gre_proto))) {
@@ -647,13 +653,13 @@ static int ipgre_rcv(struct sk_buff *skb)
647 ipgre_ecn_decapsulate(iph, skb); 653 ipgre_ecn_decapsulate(iph, skb);
648 654
649 netif_rx(skb); 655 netif_rx(skb);
650 read_unlock(&ipgre_lock); 656 rcu_read_unlock();
651 return(0); 657 return(0);
652 } 658 }
653 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 659 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
654 660
655drop: 661drop:
656 read_unlock(&ipgre_lock); 662 rcu_read_unlock();
657drop_nolock: 663drop_nolock:
658 kfree_skb(skb); 664 kfree_skb(skb);
659 return(0); 665 return(0);
@@ -662,7 +668,8 @@ drop_nolock:
662static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 668static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
663{ 669{
664 struct ip_tunnel *tunnel = netdev_priv(dev); 670 struct ip_tunnel *tunnel = netdev_priv(dev);
665 struct net_device_stats *stats = &tunnel->dev->stats; 671 struct net_device_stats *stats = &dev->stats;
672 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
666 struct iphdr *old_iph = ip_hdr(skb); 673 struct iphdr *old_iph = ip_hdr(skb);
667 struct iphdr *tiph; 674 struct iphdr *tiph;
668 u8 tos; 675 u8 tos;
@@ -810,7 +817,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
810 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 817 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
811 if (!new_skb) { 818 if (!new_skb) {
812 ip_rt_put(rt); 819 ip_rt_put(rt);
813 stats->tx_dropped++; 820 txq->tx_dropped++;
814 dev_kfree_skb(skb); 821 dev_kfree_skb(skb);
815 return NETDEV_TX_OK; 822 return NETDEV_TX_OK;
816 } 823 }
@@ -1283,16 +1290,19 @@ static const struct net_protocol ipgre_protocol = {
1283 .netns_ok = 1, 1290 .netns_ok = 1,
1284}; 1291};
1285 1292
1286static void ipgre_destroy_tunnels(struct ipgre_net *ign) 1293static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1287{ 1294{
1288 int prio; 1295 int prio;
1289 1296
1290 for (prio = 0; prio < 4; prio++) { 1297 for (prio = 0; prio < 4; prio++) {
1291 int h; 1298 int h;
1292 for (h = 0; h < HASH_SIZE; h++) { 1299 for (h = 0; h < HASH_SIZE; h++) {
1293 struct ip_tunnel *t; 1300 struct ip_tunnel *t = ign->tunnels[prio][h];
1294 while ((t = ign->tunnels[prio][h]) != NULL) 1301
1295 unregister_netdevice(t->dev); 1302 while (t != NULL) {
1303 unregister_netdevice_queue(t->dev, head);
1304 t = t->next;
1305 }
1296 } 1306 }
1297 } 1307 }
1298} 1308}
@@ -1340,10 +1350,12 @@ err_alloc:
1340static void ipgre_exit_net(struct net *net) 1350static void ipgre_exit_net(struct net *net)
1341{ 1351{
1342 struct ipgre_net *ign; 1352 struct ipgre_net *ign;
1353 LIST_HEAD(list);
1343 1354
1344 ign = net_generic(net, ipgre_net_id); 1355 ign = net_generic(net, ipgre_net_id);
1345 rtnl_lock(); 1356 rtnl_lock();
1346 ipgre_destroy_tunnels(ign); 1357 ipgre_destroy_tunnels(ign, &list);
1358 unregister_netdevice_many(&list);
1347 rtnl_unlock(); 1359 rtnl_unlock();
1348 kfree(ign); 1360 kfree(ign);
1349} 1361}
@@ -1471,7 +1483,7 @@ static void ipgre_tap_setup(struct net_device *dev)
1471 dev->features |= NETIF_F_NETNS_LOCAL; 1483 dev->features |= NETIF_F_NETNS_LOCAL;
1472} 1484}
1473 1485
1474static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[], 1486static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
1475 struct nlattr *data[]) 1487 struct nlattr *data[])
1476{ 1488{
1477 struct ip_tunnel *nt; 1489 struct ip_tunnel *nt;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 6c98b43badf4..fdf51badc8e5 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -161,7 +161,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
161 /* If socket is bound to an interface, only report 161 /* If socket is bound to an interface, only report
162 * the packet if it came from that interface. 162 * the packet if it came from that interface.
163 */ 163 */
164 if (sk && inet_sk(sk)->num == protocol && 164 if (sk && inet_sk(sk)->inet_num == protocol &&
165 (!sk->sk_bound_dev_if || 165 (!sk->sk_bound_dev_if ||
166 sk->sk_bound_dev_if == dev->ifindex) && 166 sk->sk_bound_dev_if == dev->ifindex) &&
167 sock_net(sk) == dev_net(dev)) { 167 sock_net(sk) == dev_net(dev)) {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index f9895180f481..322b40864ac0 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -329,7 +329,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
329 __be32 daddr; 329 __be32 daddr;
330 330
331 /* Use correct destination address if we have options. */ 331 /* Use correct destination address if we have options. */
332 daddr = inet->daddr; 332 daddr = inet->inet_daddr;
333 if(opt && opt->srr) 333 if(opt && opt->srr)
334 daddr = opt->faddr; 334 daddr = opt->faddr;
335 335
@@ -338,13 +338,13 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
338 .mark = sk->sk_mark, 338 .mark = sk->sk_mark,
339 .nl_u = { .ip4_u = 339 .nl_u = { .ip4_u =
340 { .daddr = daddr, 340 { .daddr = daddr,
341 .saddr = inet->saddr, 341 .saddr = inet->inet_saddr,
342 .tos = RT_CONN_FLAGS(sk) } }, 342 .tos = RT_CONN_FLAGS(sk) } },
343 .proto = sk->sk_protocol, 343 .proto = sk->sk_protocol,
344 .flags = inet_sk_flowi_flags(sk), 344 .flags = inet_sk_flowi_flags(sk),
345 .uli_u = { .ports = 345 .uli_u = { .ports =
346 { .sport = inet->sport, 346 { .sport = inet->inet_sport,
347 .dport = inet->dport } } }; 347 .dport = inet->inet_dport } } };
348 348
349 /* If this fails, retransmit mechanism of transport layer will 349 /* If this fails, retransmit mechanism of transport layer will
350 * keep trying until route appears or the connection times 350 * keep trying until route appears or the connection times
@@ -379,7 +379,7 @@ packet_routed:
379 379
380 if (opt && opt->optlen) { 380 if (opt && opt->optlen) {
381 iph->ihl += opt->optlen >> 2; 381 iph->ihl += opt->optlen >> 2;
382 ip_options_build(skb, opt, inet->daddr, rt, 0); 382 ip_options_build(skb, opt, inet->inet_daddr, rt, 0);
383 } 383 }
384 384
385 ip_select_ident_more(iph, &rt->u.dst, sk, 385 ip_select_ident_more(iph, &rt->u.dst, sk,
@@ -846,7 +846,8 @@ int ip_append_data(struct sock *sk,
846 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 846 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
847 847
848 if (inet->cork.length + length > 0xFFFF - fragheaderlen) { 848 if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
849 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen); 849 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
850 mtu-exthdrlen);
850 return -EMSGSIZE; 851 return -EMSGSIZE;
851 } 852 }
852 853
@@ -1100,7 +1101,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1100 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 1101 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1101 1102
1102 if (inet->cork.length + size > 0xFFFF - fragheaderlen) { 1103 if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
1103 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu); 1104 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, mtu);
1104 return -EMSGSIZE; 1105 return -EMSGSIZE;
1105 } 1106 }
1106 1107
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index e982b5c1ee17..cafad9baff03 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -245,7 +245,7 @@ int ip_ra_control(struct sock *sk, unsigned char on,
245{ 245{
246 struct ip_ra_chain *ra, *new_ra, **rap; 246 struct ip_ra_chain *ra, *new_ra, **rap;
247 247
248 if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num == IPPROTO_RAW) 248 if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
249 return -EINVAL; 249 return -EINVAL;
250 250
251 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; 251 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
@@ -480,7 +480,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
480 case IP_OPTIONS: 480 case IP_OPTIONS:
481 { 481 {
482 struct ip_options *opt = NULL; 482 struct ip_options *opt = NULL;
483 if (optlen > 40 || optlen < 0) 483 if (optlen > 40)
484 goto e_inval; 484 goto e_inval;
485 err = ip_options_get_from_user(sock_net(sk), &opt, 485 err = ip_options_get_from_user(sock_net(sk), &opt,
486 optval, optlen); 486 optval, optlen);
@@ -492,7 +492,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
492 if (sk->sk_family == PF_INET || 492 if (sk->sk_family == PF_INET ||
493 (!((1 << sk->sk_state) & 493 (!((1 << sk->sk_state) &
494 (TCPF_LISTEN | TCPF_CLOSE)) && 494 (TCPF_LISTEN | TCPF_CLOSE)) &&
495 inet->daddr != LOOPBACK4_IPV6)) { 495 inet->inet_daddr != LOOPBACK4_IPV6)) {
496#endif 496#endif
497 if (inet->opt) 497 if (inet->opt)
498 icsk->icsk_ext_hdr_len -= inet->opt->optlen; 498 icsk->icsk_ext_hdr_len -= inet->opt->optlen;
@@ -575,7 +575,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
575 inet->hdrincl = val ? 1 : 0; 575 inet->hdrincl = val ? 1 : 0;
576 break; 576 break;
577 case IP_MTU_DISCOVER: 577 case IP_MTU_DISCOVER:
578 if (val < 0 || val > 3) 578 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
579 goto e_inval; 579 goto e_inval;
580 inet->pmtudisc = val; 580 inet->pmtudisc = val;
581 break; 581 break;
@@ -1180,8 +1180,8 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1180 if (inet->cmsg_flags & IP_CMSG_PKTINFO) { 1180 if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
1181 struct in_pktinfo info; 1181 struct in_pktinfo info;
1182 1182
1183 info.ipi_addr.s_addr = inet->rcv_saddr; 1183 info.ipi_addr.s_addr = inet->inet_rcv_saddr;
1184 info.ipi_spec_dst.s_addr = inet->rcv_saddr; 1184 info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr;
1185 info.ipi_ifindex = inet->mc_index; 1185 info.ipi_ifindex = inet->mc_index;
1186 put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); 1186 put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
1187 } 1187 }
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index ae40ed1ba560..c5b1f71c3cd8 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -134,7 +134,13 @@ static void ipip_fb_tunnel_init(struct net_device *dev);
134static void ipip_tunnel_init(struct net_device *dev); 134static void ipip_tunnel_init(struct net_device *dev);
135static void ipip_tunnel_setup(struct net_device *dev); 135static void ipip_tunnel_setup(struct net_device *dev);
136 136
137static DEFINE_RWLOCK(ipip_lock); 137/*
138 * Locking : hash tables are protected by RCU and a spinlock
139 */
140static DEFINE_SPINLOCK(ipip_lock);
141
142#define for_each_ip_tunnel_rcu(start) \
143 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
138 144
139static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, 145static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
140 __be32 remote, __be32 local) 146 __be32 remote, __be32 local)
@@ -144,20 +150,21 @@ static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
144 struct ip_tunnel *t; 150 struct ip_tunnel *t;
145 struct ipip_net *ipn = net_generic(net, ipip_net_id); 151 struct ipip_net *ipn = net_generic(net, ipip_net_id);
146 152
147 for (t = ipn->tunnels_r_l[h0^h1]; t; t = t->next) { 153 for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
148 if (local == t->parms.iph.saddr && 154 if (local == t->parms.iph.saddr &&
149 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 155 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
150 return t; 156 return t;
151 } 157
152 for (t = ipn->tunnels_r[h0]; t; t = t->next) { 158 for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
153 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 159 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
154 return t; 160 return t;
155 } 161
156 for (t = ipn->tunnels_l[h1]; t; t = t->next) { 162 for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
157 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) 163 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
158 return t; 164 return t;
159 } 165
160 if ((t = ipn->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP)) 166 t = rcu_dereference(ipn->tunnels_wc[0]);
167 if (t && (t->dev->flags&IFF_UP))
161 return t; 168 return t;
162 return NULL; 169 return NULL;
163} 170}
@@ -193,9 +200,9 @@ static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
193 200
194 for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) { 201 for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) {
195 if (t == *tp) { 202 if (t == *tp) {
196 write_lock_bh(&ipip_lock); 203 spin_lock_bh(&ipip_lock);
197 *tp = t->next; 204 *tp = t->next;
198 write_unlock_bh(&ipip_lock); 205 spin_unlock_bh(&ipip_lock);
199 break; 206 break;
200 } 207 }
201 } 208 }
@@ -205,10 +212,10 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
205{ 212{
206 struct ip_tunnel **tp = ipip_bucket(ipn, t); 213 struct ip_tunnel **tp = ipip_bucket(ipn, t);
207 214
215 spin_lock_bh(&ipip_lock);
208 t->next = *tp; 216 t->next = *tp;
209 write_lock_bh(&ipip_lock); 217 rcu_assign_pointer(*tp, t);
210 *tp = t; 218 spin_unlock_bh(&ipip_lock);
211 write_unlock_bh(&ipip_lock);
212} 219}
213 220
214static struct ip_tunnel * ipip_tunnel_locate(struct net *net, 221static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
@@ -267,9 +274,9 @@ static void ipip_tunnel_uninit(struct net_device *dev)
267 struct ipip_net *ipn = net_generic(net, ipip_net_id); 274 struct ipip_net *ipn = net_generic(net, ipip_net_id);
268 275
269 if (dev == ipn->fb_tunnel_dev) { 276 if (dev == ipn->fb_tunnel_dev) {
270 write_lock_bh(&ipip_lock); 277 spin_lock_bh(&ipip_lock);
271 ipn->tunnels_wc[0] = NULL; 278 ipn->tunnels_wc[0] = NULL;
272 write_unlock_bh(&ipip_lock); 279 spin_unlock_bh(&ipip_lock);
273 } else 280 } else
274 ipip_tunnel_unlink(ipn, netdev_priv(dev)); 281 ipip_tunnel_unlink(ipn, netdev_priv(dev));
275 dev_put(dev); 282 dev_put(dev);
@@ -318,7 +325,7 @@ static int ipip_err(struct sk_buff *skb, u32 info)
318 325
319 err = -ENOENT; 326 err = -ENOENT;
320 327
321 read_lock(&ipip_lock); 328 rcu_read_lock();
322 t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); 329 t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
323 if (t == NULL || t->parms.iph.daddr == 0) 330 if (t == NULL || t->parms.iph.daddr == 0)
324 goto out; 331 goto out;
@@ -333,7 +340,7 @@ static int ipip_err(struct sk_buff *skb, u32 info)
333 t->err_count = 1; 340 t->err_count = 1;
334 t->err_time = jiffies; 341 t->err_time = jiffies;
335out: 342out:
336 read_unlock(&ipip_lock); 343 rcu_read_unlock();
337 return err; 344 return err;
338} 345}
339 346
@@ -351,11 +358,11 @@ static int ipip_rcv(struct sk_buff *skb)
351 struct ip_tunnel *tunnel; 358 struct ip_tunnel *tunnel;
352 const struct iphdr *iph = ip_hdr(skb); 359 const struct iphdr *iph = ip_hdr(skb);
353 360
354 read_lock(&ipip_lock); 361 rcu_read_lock();
355 if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev), 362 if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev),
356 iph->saddr, iph->daddr)) != NULL) { 363 iph->saddr, iph->daddr)) != NULL) {
357 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 364 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
358 read_unlock(&ipip_lock); 365 rcu_read_unlock();
359 kfree_skb(skb); 366 kfree_skb(skb);
360 return 0; 367 return 0;
361 } 368 }
@@ -374,10 +381,10 @@ static int ipip_rcv(struct sk_buff *skb)
374 nf_reset(skb); 381 nf_reset(skb);
375 ipip_ecn_decapsulate(iph, skb); 382 ipip_ecn_decapsulate(iph, skb);
376 netif_rx(skb); 383 netif_rx(skb);
377 read_unlock(&ipip_lock); 384 rcu_read_unlock();
378 return 0; 385 return 0;
379 } 386 }
380 read_unlock(&ipip_lock); 387 rcu_read_unlock();
381 388
382 return -1; 389 return -1;
383} 390}
@@ -390,7 +397,8 @@ static int ipip_rcv(struct sk_buff *skb)
390static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 397static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
391{ 398{
392 struct ip_tunnel *tunnel = netdev_priv(dev); 399 struct ip_tunnel *tunnel = netdev_priv(dev);
393 struct net_device_stats *stats = &tunnel->dev->stats; 400 struct net_device_stats *stats = &dev->stats;
401 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
394 struct iphdr *tiph = &tunnel->parms.iph; 402 struct iphdr *tiph = &tunnel->parms.iph;
395 u8 tos = tunnel->parms.iph.tos; 403 u8 tos = tunnel->parms.iph.tos;
396 __be16 df = tiph->frag_off; 404 __be16 df = tiph->frag_off;
@@ -480,7 +488,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
480 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 488 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
481 if (!new_skb) { 489 if (!new_skb) {
482 ip_rt_put(rt); 490 ip_rt_put(rt);
483 stats->tx_dropped++; 491 txq->tx_dropped++;
484 dev_kfree_skb(skb); 492 dev_kfree_skb(skb);
485 return NETDEV_TX_OK; 493 return NETDEV_TX_OK;
486 } 494 }
@@ -748,16 +756,19 @@ static struct xfrm_tunnel ipip_handler = {
748static const char banner[] __initconst = 756static const char banner[] __initconst =
749 KERN_INFO "IPv4 over IPv4 tunneling driver\n"; 757 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
750 758
751static void ipip_destroy_tunnels(struct ipip_net *ipn) 759static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
752{ 760{
753 int prio; 761 int prio;
754 762
755 for (prio = 1; prio < 4; prio++) { 763 for (prio = 1; prio < 4; prio++) {
756 int h; 764 int h;
757 for (h = 0; h < HASH_SIZE; h++) { 765 for (h = 0; h < HASH_SIZE; h++) {
758 struct ip_tunnel *t; 766 struct ip_tunnel *t = ipn->tunnels[prio][h];
759 while ((t = ipn->tunnels[prio][h]) != NULL) 767
760 unregister_netdevice(t->dev); 768 while (t != NULL) {
769 unregister_netdevice_queue(t->dev, head);
770 t = t->next;
771 }
761 } 772 }
762 } 773 }
763} 774}
@@ -810,11 +821,13 @@ err_alloc:
810static void ipip_exit_net(struct net *net) 821static void ipip_exit_net(struct net *net)
811{ 822{
812 struct ipip_net *ipn; 823 struct ipip_net *ipn;
824 LIST_HEAD(list);
813 825
814 ipn = net_generic(net, ipip_net_id); 826 ipn = net_generic(net, ipip_net_id);
815 rtnl_lock(); 827 rtnl_lock();
816 ipip_destroy_tunnels(ipn); 828 ipip_destroy_tunnels(ipn, &list);
817 unregister_netdevice(ipn->fb_tunnel_dev); 829 unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
830 unregister_netdevice_many(&list);
818 rtnl_unlock(); 831 rtnl_unlock();
819 kfree(ipn); 832 kfree(ipn);
820} 833}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 99508d66a642..54596f73eff5 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -275,7 +275,8 @@ failure:
275 * @notify: Set to 1, if the caller is a notifier_call 275 * @notify: Set to 1, if the caller is a notifier_call
276 */ 276 */
277 277
278static int vif_delete(struct net *net, int vifi, int notify) 278static int vif_delete(struct net *net, int vifi, int notify,
279 struct list_head *head)
279{ 280{
280 struct vif_device *v; 281 struct vif_device *v;
281 struct net_device *dev; 282 struct net_device *dev;
@@ -319,7 +320,7 @@ static int vif_delete(struct net *net, int vifi, int notify)
319 } 320 }
320 321
321 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify) 322 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
322 unregister_netdevice(dev); 323 unregister_netdevice_queue(dev, head);
323 324
324 dev_put(dev); 325 dev_put(dev);
325 return 0; 326 return 0;
@@ -469,8 +470,18 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
469 return err; 470 return err;
470 } 471 }
471 break; 472 break;
473
474 case VIFF_USE_IFINDEX:
472 case 0: 475 case 0:
473 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 476 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
477 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
478 if (dev && dev->ip_ptr == NULL) {
479 dev_put(dev);
480 return -EADDRNOTAVAIL;
481 }
482 } else
483 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
484
474 if (!dev) 485 if (!dev)
475 return -EADDRNOTAVAIL; 486 return -EADDRNOTAVAIL;
476 err = dev_set_allmulti(dev, 1); 487 err = dev_set_allmulti(dev, 1);
@@ -862,14 +873,16 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
862static void mroute_clean_tables(struct net *net) 873static void mroute_clean_tables(struct net *net)
863{ 874{
864 int i; 875 int i;
876 LIST_HEAD(list);
865 877
866 /* 878 /*
867 * Shut down all active vif entries 879 * Shut down all active vif entries
868 */ 880 */
869 for (i = 0; i < net->ipv4.maxvif; i++) { 881 for (i = 0; i < net->ipv4.maxvif; i++) {
870 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC)) 882 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
871 vif_delete(net, i, 0); 883 vif_delete(net, i, 0, &list);
872 } 884 }
885 unregister_netdevice_many(&list);
873 886
874 /* 887 /*
875 * Wipe the cache 888 * Wipe the cache
@@ -948,7 +961,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
948 switch (optname) { 961 switch (optname) {
949 case MRT_INIT: 962 case MRT_INIT:
950 if (sk->sk_type != SOCK_RAW || 963 if (sk->sk_type != SOCK_RAW ||
951 inet_sk(sk)->num != IPPROTO_IGMP) 964 inet_sk(sk)->inet_num != IPPROTO_IGMP)
952 return -EOPNOTSUPP; 965 return -EOPNOTSUPP;
953 if (optlen != sizeof(int)) 966 if (optlen != sizeof(int))
954 return -ENOPROTOOPT; 967 return -ENOPROTOOPT;
@@ -985,7 +998,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
985 if (optname == MRT_ADD_VIF) { 998 if (optname == MRT_ADD_VIF) {
986 ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk); 999 ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
987 } else { 1000 } else {
988 ret = vif_delete(net, vif.vifc_vifi, 0); 1001 ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
989 } 1002 }
990 rtnl_unlock(); 1003 rtnl_unlock();
991 return ret; 1004 return ret;
@@ -1148,6 +1161,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
1148 struct net *net = dev_net(dev); 1161 struct net *net = dev_net(dev);
1149 struct vif_device *v; 1162 struct vif_device *v;
1150 int ct; 1163 int ct;
1164 LIST_HEAD(list);
1151 1165
1152 if (!net_eq(dev_net(dev), net)) 1166 if (!net_eq(dev_net(dev), net))
1153 return NOTIFY_DONE; 1167 return NOTIFY_DONE;
@@ -1157,8 +1171,9 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
1157 v = &net->ipv4.vif_table[0]; 1171 v = &net->ipv4.vif_table[0];
1158 for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) { 1172 for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1159 if (v->dev == dev) 1173 if (v->dev == dev)
1160 vif_delete(net, ct, 1); 1174 vif_delete(net, ct, 1, &list);
1161 } 1175 }
1176 unregister_netdevice_many(&list);
1162 return NOTIFY_DONE; 1177 return NOTIFY_DONE;
1163} 1178}
1164 1179
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index aa95bb82ee6c..9cd423ffafa8 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -255,10 +255,10 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
255 struct nf_conntrack_tuple tuple; 255 struct nf_conntrack_tuple tuple;
256 256
257 memset(&tuple, 0, sizeof(tuple)); 257 memset(&tuple, 0, sizeof(tuple));
258 tuple.src.u3.ip = inet->rcv_saddr; 258 tuple.src.u3.ip = inet->inet_rcv_saddr;
259 tuple.src.u.tcp.port = inet->sport; 259 tuple.src.u.tcp.port = inet->inet_sport;
260 tuple.dst.u3.ip = inet->daddr; 260 tuple.dst.u3.ip = inet->inet_daddr;
261 tuple.dst.u.tcp.port = inet->dport; 261 tuple.dst.u.tcp.port = inet->inet_dport;
262 tuple.src.l3num = PF_INET; 262 tuple.src.l3num = PF_INET;
263 tuple.dst.protonum = sk->sk_protocol; 263 tuple.dst.protonum = sk->sk_protocol;
264 264
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ab996f9c0fe0..ce154b47f1da 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -87,7 +87,7 @@ void raw_hash_sk(struct sock *sk)
87 struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; 87 struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
88 struct hlist_head *head; 88 struct hlist_head *head;
89 89
90 head = &h->ht[inet_sk(sk)->num & (RAW_HTABLE_SIZE - 1)]; 90 head = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)];
91 91
92 write_lock_bh(&h->lock); 92 write_lock_bh(&h->lock);
93 sk_add_node(sk, head); 93 sk_add_node(sk, head);
@@ -115,9 +115,9 @@ static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
115 sk_for_each_from(sk, node) { 115 sk_for_each_from(sk, node) {
116 struct inet_sock *inet = inet_sk(sk); 116 struct inet_sock *inet = inet_sk(sk);
117 117
118 if (net_eq(sock_net(sk), net) && inet->num == num && 118 if (net_eq(sock_net(sk), net) && inet->inet_num == num &&
119 !(inet->daddr && inet->daddr != raddr) && 119 !(inet->inet_daddr && inet->inet_daddr != raddr) &&
120 !(inet->rcv_saddr && inet->rcv_saddr != laddr) && 120 !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
121 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) 121 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
122 goto found; /* gotcha */ 122 goto found; /* gotcha */
123 } 123 }
@@ -292,7 +292,6 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
292 /* Charge it to the socket. */ 292 /* Charge it to the socket. */
293 293
294 if (sock_queue_rcv_skb(sk, skb) < 0) { 294 if (sock_queue_rcv_skb(sk, skb) < 0) {
295 atomic_inc(&sk->sk_drops);
296 kfree_skb(skb); 295 kfree_skb(skb);
297 return NET_RX_DROP; 296 return NET_RX_DROP;
298 } 297 }
@@ -327,7 +326,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
327 int err; 326 int err;
328 327
329 if (length > rt->u.dst.dev->mtu) { 328 if (length > rt->u.dst.dev->mtu) {
330 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, 329 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
331 rt->u.dst.dev->mtu); 330 rt->u.dst.dev->mtu);
332 return -EMSGSIZE; 331 return -EMSGSIZE;
333 } 332 }
@@ -500,10 +499,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
500 err = -EDESTADDRREQ; 499 err = -EDESTADDRREQ;
501 if (sk->sk_state != TCP_ESTABLISHED) 500 if (sk->sk_state != TCP_ESTABLISHED)
502 goto out; 501 goto out;
503 daddr = inet->daddr; 502 daddr = inet->inet_daddr;
504 } 503 }
505 504
506 ipc.addr = inet->saddr; 505 ipc.addr = inet->inet_saddr;
507 ipc.opt = NULL; 506 ipc.opt = NULL;
508 ipc.shtx.flags = 0; 507 ipc.shtx.flags = 0;
509 ipc.oif = sk->sk_bound_dev_if; 508 ipc.oif = sk->sk_bound_dev_if;
@@ -645,9 +644,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
645 if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && 644 if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
646 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) 645 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
647 goto out; 646 goto out;
648 inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; 647 inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
649 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) 648 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
650 inet->saddr = 0; /* Use device */ 649 inet->inet_saddr = 0; /* Use device */
651 sk_dst_reset(sk); 650 sk_dst_reset(sk);
652 ret = 0; 651 ret = 0;
653out: return ret; 652out: return ret;
@@ -692,7 +691,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
692 if (err) 691 if (err)
693 goto done; 692 goto done;
694 693
695 sock_recv_timestamp(msg, sk, skb); 694 sock_recv_ts_and_drops(msg, sk, skb);
696 695
697 /* Copy the address. */ 696 /* Copy the address. */
698 if (sin) { 697 if (sin) {
@@ -717,7 +716,7 @@ static int raw_init(struct sock *sk)
717{ 716{
718 struct raw_sock *rp = raw_sk(sk); 717 struct raw_sock *rp = raw_sk(sk);
719 718
720 if (inet_sk(sk)->num == IPPROTO_ICMP) 719 if (inet_sk(sk)->inet_num == IPPROTO_ICMP)
721 memset(&rp->filter, 0, sizeof(rp->filter)); 720 memset(&rp->filter, 0, sizeof(rp->filter));
722 return 0; 721 return 0;
723} 722}
@@ -754,7 +753,7 @@ static int do_raw_setsockopt(struct sock *sk, int level, int optname,
754 char __user *optval, unsigned int optlen) 753 char __user *optval, unsigned int optlen)
755{ 754{
756 if (optname == ICMP_FILTER) { 755 if (optname == ICMP_FILTER) {
757 if (inet_sk(sk)->num != IPPROTO_ICMP) 756 if (inet_sk(sk)->inet_num != IPPROTO_ICMP)
758 return -EOPNOTSUPP; 757 return -EOPNOTSUPP;
759 else 758 else
760 return raw_seticmpfilter(sk, optval, optlen); 759 return raw_seticmpfilter(sk, optval, optlen);
@@ -784,7 +783,7 @@ static int do_raw_getsockopt(struct sock *sk, int level, int optname,
784 char __user *optval, int __user *optlen) 783 char __user *optval, int __user *optlen)
785{ 784{
786 if (optname == ICMP_FILTER) { 785 if (optname == ICMP_FILTER) {
787 if (inet_sk(sk)->num != IPPROTO_ICMP) 786 if (inet_sk(sk)->inet_num != IPPROTO_ICMP)
788 return -EOPNOTSUPP; 787 return -EOPNOTSUPP;
789 else 788 else
790 return raw_geticmpfilter(sk, optval, optlen); 789 return raw_geticmpfilter(sk, optval, optlen);
@@ -943,10 +942,10 @@ EXPORT_SYMBOL_GPL(raw_seq_stop);
943static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) 942static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
944{ 943{
945 struct inet_sock *inet = inet_sk(sp); 944 struct inet_sock *inet = inet_sk(sp);
946 __be32 dest = inet->daddr, 945 __be32 dest = inet->inet_daddr,
947 src = inet->rcv_saddr; 946 src = inet->inet_rcv_saddr;
948 __u16 destp = 0, 947 __u16 destp = 0,
949 srcp = inet->num; 948 srcp = inet->inet_num;
950 949
951 seq_printf(seq, "%4d: %08X:%04X %08X:%04X" 950 seq_printf(seq, "%4d: %08X:%04X %08X:%04X"
952 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", 951 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5b1050a5d874..4284ceef7945 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1628,9 +1628,6 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1628 __be32 daddr = iph->daddr; 1628 __be32 daddr = iph->daddr;
1629 unsigned short est_mtu = 0; 1629 unsigned short est_mtu = 0;
1630 1630
1631 if (ipv4_config.no_pmtu_disc)
1632 return 0;
1633
1634 for (k = 0; k < 2; k++) { 1631 for (k = 0; k < 2; k++) {
1635 for (i = 0; i < 2; i++) { 1632 for (i = 0; i < 2; i++) {
1636 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], 1633 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
@@ -2855,7 +2852,7 @@ static int rt_fill_info(struct net *net,
2855 error = rt->u.dst.error; 2852 error = rt->u.dst.error;
2856 expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; 2853 expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0;
2857 if (rt->peer) { 2854 if (rt->peer) {
2858 id = rt->peer->ip_id_count; 2855 id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
2859 if (rt->peer->tcp_ts_stamp) { 2856 if (rt->peer->tcp_ts_stamp) {
2860 ts = rt->peer->tcp_ts; 2857 ts = rt->peer->tcp_ts;
2861 tsage = get_seconds() - rt->peer->tcp_ts_stamp; 2858 tsage = get_seconds() - rt->peer->tcp_ts_stamp;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index a6e0e077ac33..3146cc401748 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -276,13 +276,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
276 276
277 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); 277 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
278 278
279 /* check for timestamp cookie support */
280 memset(&tcp_opt, 0, sizeof(tcp_opt));
281 tcp_parse_options(skb, &tcp_opt, 0);
282
283 if (tcp_opt.saw_tstamp)
284 cookie_check_timestamp(&tcp_opt);
285
286 ret = NULL; 279 ret = NULL;
287 req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ 280 req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
288 if (!req) 281 if (!req)
@@ -298,12 +291,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
298 ireq->loc_addr = ip_hdr(skb)->daddr; 291 ireq->loc_addr = ip_hdr(skb)->daddr;
299 ireq->rmt_addr = ip_hdr(skb)->saddr; 292 ireq->rmt_addr = ip_hdr(skb)->saddr;
300 ireq->ecn_ok = 0; 293 ireq->ecn_ok = 0;
301 ireq->snd_wscale = tcp_opt.snd_wscale;
302 ireq->rcv_wscale = tcp_opt.rcv_wscale;
303 ireq->sack_ok = tcp_opt.sack_ok;
304 ireq->wscale_ok = tcp_opt.wscale_ok;
305 ireq->tstamp_ok = tcp_opt.saw_tstamp;
306 req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
307 294
308 /* We throwed the options of the initial SYN away, so we hope 295 /* We throwed the options of the initial SYN away, so we hope
309 * the ACK carries the same options again (see RFC1122 4.2.3.8) 296 * the ACK carries the same options again (see RFC1122 4.2.3.8)
@@ -333,7 +320,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
333 * no easy way to do this. 320 * no easy way to do this.
334 */ 321 */
335 { 322 {
336 struct flowi fl = { .nl_u = { .ip4_u = 323 struct flowi fl = { .mark = sk->sk_mark,
324 .nl_u = { .ip4_u =
337 { .daddr = ((opt && opt->srr) ? 325 { .daddr = ((opt && opt->srr) ?
338 opt->faddr : 326 opt->faddr :
339 ireq->rmt_addr), 327 ireq->rmt_addr),
@@ -351,6 +339,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
351 } 339 }
352 } 340 }
353 341
342 /* check for timestamp cookie support */
343 memset(&tcp_opt, 0, sizeof(tcp_opt));
344 tcp_parse_options(skb, &tcp_opt, 0, &rt->u.dst);
345
346 if (tcp_opt.saw_tstamp)
347 cookie_check_timestamp(&tcp_opt);
348
349 ireq->snd_wscale = tcp_opt.snd_wscale;
350 ireq->rcv_wscale = tcp_opt.rcv_wscale;
351 ireq->sack_ok = tcp_opt.sack_ok;
352 ireq->wscale_ok = tcp_opt.wscale_ok;
353 ireq->tstamp_ok = tcp_opt.saw_tstamp;
354 req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
355
354 /* Try to redo what tcp_v4_send_synack did. */ 356 /* Try to redo what tcp_v4_send_synack did. */
355 req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW); 357 req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW);
356 358
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f1813bc71088..524f9760193b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2042,7 +2042,7 @@ int tcp_disconnect(struct sock *sk, int flags)
2042 __skb_queue_purge(&sk->sk_async_wait_queue); 2042 __skb_queue_purge(&sk->sk_async_wait_queue);
2043#endif 2043#endif
2044 2044
2045 inet->dport = 0; 2045 inet->inet_dport = 0;
2046 2046
2047 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 2047 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
2048 inet_reset_saddr(sk); 2048 inet_reset_saddr(sk);
@@ -2066,7 +2066,7 @@ int tcp_disconnect(struct sock *sk, int flags)
2066 memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); 2066 memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
2067 __sk_dst_reset(sk); 2067 __sk_dst_reset(sk);
2068 2068
2069 WARN_ON(inet->num && !icsk->icsk_bind_hash); 2069 WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
2070 2070
2071 sk->sk_error_report(sk); 2071 sk->sk_error_report(sk);
2072 return err; 2072 return err;
@@ -2903,11 +2903,10 @@ void __init tcp_init(void)
2903 (totalram_pages >= 128 * 1024) ? 2903 (totalram_pages >= 128 * 1024) ?
2904 13 : 15, 2904 13 : 15,
2905 0, 2905 0,
2906 &tcp_hashinfo.ehash_size,
2907 NULL, 2906 NULL,
2907 &tcp_hashinfo.ehash_mask,
2908 thash_entries ? 0 : 512 * 1024); 2908 thash_entries ? 0 : 512 * 1024);
2909 tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; 2909 for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) {
2910 for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
2911 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); 2910 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i);
2912 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i); 2911 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i);
2913 } 2912 }
@@ -2916,7 +2915,7 @@ void __init tcp_init(void)
2916 tcp_hashinfo.bhash = 2915 tcp_hashinfo.bhash =
2917 alloc_large_system_hash("TCP bind", 2916 alloc_large_system_hash("TCP bind",
2918 sizeof(struct inet_bind_hashbucket), 2917 sizeof(struct inet_bind_hashbucket),
2919 tcp_hashinfo.ehash_size, 2918 tcp_hashinfo.ehash_mask + 1,
2920 (totalram_pages >= 128 * 1024) ? 2919 (totalram_pages >= 128 * 1024) ?
2921 13 : 15, 2920 13 : 15,
2922 0, 2921 0,
@@ -2971,8 +2970,8 @@ void __init tcp_init(void)
2971 sysctl_tcp_rmem[2] = max(87380, max_share); 2970 sysctl_tcp_rmem[2] = max(87380, max_share);
2972 2971
2973 printk(KERN_INFO "TCP: Hash tables configured " 2972 printk(KERN_INFO "TCP: Hash tables configured "
2974 "(established %d bind %d)\n", 2973 "(established %u bind %u)\n",
2975 tcp_hashinfo.ehash_size, tcp_hashinfo.bhash_size); 2974 tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
2976 2975
2977 tcp_register_congestion_control(&tcp_reno); 2976 tcp_register_congestion_control(&tcp_reno);
2978} 2977}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d86784be7ab3..cc306ac6eb51 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -140,7 +140,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
140 * "len" is invariant segment length, including TCP header. 140 * "len" is invariant segment length, including TCP header.
141 */ 141 */
142 len += skb->data - skb_transport_header(skb); 142 len += skb->data - skb_transport_header(skb);
143 if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) || 143 if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) ||
144 /* If PSH is not set, packet should be 144 /* If PSH is not set, packet should be
145 * full sized, provided peer TCP is not badly broken. 145 * full sized, provided peer TCP is not badly broken.
146 * This observation (if it is correct 8)) allows 146 * This observation (if it is correct 8)) allows
@@ -411,7 +411,7 @@ void tcp_initialize_rcv_mss(struct sock *sk)
411 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); 411 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
412 412
413 hint = min(hint, tp->rcv_wnd / 2); 413 hint = min(hint, tp->rcv_wnd / 2);
414 hint = min(hint, TCP_MIN_RCVMSS); 414 hint = min(hint, TCP_MSS_DEFAULT);
415 hint = max(hint, TCP_MIN_MSS); 415 hint = max(hint, TCP_MIN_MSS);
416 416
417 inet_csk(sk)->icsk_ack.rcv_mss = hint; 417 inet_csk(sk)->icsk_ack.rcv_mss = hint;
@@ -2300,7 +2300,7 @@ static inline int tcp_fackets_out(struct tcp_sock *tp)
2300 * they differ. Since neither occurs due to loss, TCP should really 2300 * they differ. Since neither occurs due to loss, TCP should really
2301 * ignore them. 2301 * ignore them.
2302 */ 2302 */
2303static inline int tcp_dupack_heurestics(struct tcp_sock *tp) 2303static inline int tcp_dupack_heuristics(struct tcp_sock *tp)
2304{ 2304{
2305 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; 2305 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2306} 2306}
@@ -2425,7 +2425,7 @@ static int tcp_time_to_recover(struct sock *sk)
2425 return 1; 2425 return 1;
2426 2426
2427 /* Not-A-Trick#2 : Classic rule... */ 2427 /* Not-A-Trick#2 : Classic rule... */
2428 if (tcp_dupack_heurestics(tp) > tp->reordering) 2428 if (tcp_dupack_heuristics(tp) > tp->reordering)
2429 return 1; 2429 return 1;
2430 2430
2431 /* Trick#3 : when we use RFC2988 timer restart, fast 2431 /* Trick#3 : when we use RFC2988 timer restart, fast
@@ -3698,7 +3698,7 @@ old_ack:
3698 * the fast version below fails. 3698 * the fast version below fails.
3699 */ 3699 */
3700void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, 3700void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3701 int estab) 3701 int estab, struct dst_entry *dst)
3702{ 3702{
3703 unsigned char *ptr; 3703 unsigned char *ptr;
3704 struct tcphdr *th = tcp_hdr(skb); 3704 struct tcphdr *th = tcp_hdr(skb);
@@ -3737,7 +3737,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3737 break; 3737 break;
3738 case TCPOPT_WINDOW: 3738 case TCPOPT_WINDOW:
3739 if (opsize == TCPOLEN_WINDOW && th->syn && 3739 if (opsize == TCPOLEN_WINDOW && th->syn &&
3740 !estab && sysctl_tcp_window_scaling) { 3740 !estab && sysctl_tcp_window_scaling &&
3741 !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)) {
3741 __u8 snd_wscale = *(__u8 *)ptr; 3742 __u8 snd_wscale = *(__u8 *)ptr;
3742 opt_rx->wscale_ok = 1; 3743 opt_rx->wscale_ok = 1;
3743 if (snd_wscale > 14) { 3744 if (snd_wscale > 14) {
@@ -3753,7 +3754,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3753 case TCPOPT_TIMESTAMP: 3754 case TCPOPT_TIMESTAMP:
3754 if ((opsize == TCPOLEN_TIMESTAMP) && 3755 if ((opsize == TCPOLEN_TIMESTAMP) &&
3755 ((estab && opt_rx->tstamp_ok) || 3756 ((estab && opt_rx->tstamp_ok) ||
3756 (!estab && sysctl_tcp_timestamps))) { 3757 (!estab && sysctl_tcp_timestamps &&
3758 !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP)))) {
3757 opt_rx->saw_tstamp = 1; 3759 opt_rx->saw_tstamp = 1;
3758 opt_rx->rcv_tsval = get_unaligned_be32(ptr); 3760 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3759 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); 3761 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
@@ -3761,7 +3763,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3761 break; 3763 break;
3762 case TCPOPT_SACK_PERM: 3764 case TCPOPT_SACK_PERM:
3763 if (opsize == TCPOLEN_SACK_PERM && th->syn && 3765 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3764 !estab && sysctl_tcp_sack) { 3766 !estab && sysctl_tcp_sack &&
3767 !dst_feature(dst, RTAX_FEATURE_NO_SACK)) {
3765 opt_rx->sack_ok = 1; 3768 opt_rx->sack_ok = 1;
3766 tcp_sack_reset(opt_rx); 3769 tcp_sack_reset(opt_rx);
3767 } 3770 }
@@ -3820,7 +3823,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
3820 if (tcp_parse_aligned_timestamp(tp, th)) 3823 if (tcp_parse_aligned_timestamp(tp, th))
3821 return 1; 3824 return 1;
3822 } 3825 }
3823 tcp_parse_options(skb, &tp->rx_opt, 1); 3826 tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
3824 return 1; 3827 return 1;
3825} 3828}
3826 3829
@@ -4075,8 +4078,10 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4075static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) 4078static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4076{ 4079{
4077 struct tcp_sock *tp = tcp_sk(sk); 4080 struct tcp_sock *tp = tcp_sk(sk);
4081 struct dst_entry *dst = __sk_dst_get(sk);
4078 4082
4079 if (tcp_is_sack(tp) && sysctl_tcp_dsack) { 4083 if (tcp_is_sack(tp) && sysctl_tcp_dsack &&
4084 !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) {
4080 int mib_idx; 4085 int mib_idx;
4081 4086
4082 if (before(seq, tp->rcv_nxt)) 4087 if (before(seq, tp->rcv_nxt))
@@ -4105,13 +4110,15 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4105static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) 4110static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
4106{ 4111{
4107 struct tcp_sock *tp = tcp_sk(sk); 4112 struct tcp_sock *tp = tcp_sk(sk);
4113 struct dst_entry *dst = __sk_dst_get(sk);
4108 4114
4109 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && 4115 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4110 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { 4116 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4111 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); 4117 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4112 tcp_enter_quickack_mode(sk); 4118 tcp_enter_quickack_mode(sk);
4113 4119
4114 if (tcp_is_sack(tp) && sysctl_tcp_dsack) { 4120 if (tcp_is_sack(tp) && sysctl_tcp_dsack &&
4121 !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) {
4115 u32 end_seq = TCP_SKB_CB(skb)->end_seq; 4122 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4116 4123
4117 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) 4124 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@ -5364,8 +5371,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5364 struct tcp_sock *tp = tcp_sk(sk); 5371 struct tcp_sock *tp = tcp_sk(sk);
5365 struct inet_connection_sock *icsk = inet_csk(sk); 5372 struct inet_connection_sock *icsk = inet_csk(sk);
5366 int saved_clamp = tp->rx_opt.mss_clamp; 5373 int saved_clamp = tp->rx_opt.mss_clamp;
5374 struct dst_entry *dst = __sk_dst_get(sk);
5367 5375
5368 tcp_parse_options(skb, &tp->rx_opt, 0); 5376 tcp_parse_options(skb, &tp->rx_opt, 0, dst);
5369 5377
5370 if (th->ack) { 5378 if (th->ack) {
5371 /* rfc793: 5379 /* rfc793:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7cda24b53f61..df18ce04f41e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -165,10 +165,10 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
165 nexthop = inet->opt->faddr; 165 nexthop = inet->opt->faddr;
166 } 166 }
167 167
168 tmp = ip_route_connect(&rt, nexthop, inet->saddr, 168 tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,
169 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 169 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
170 IPPROTO_TCP, 170 IPPROTO_TCP,
171 inet->sport, usin->sin_port, sk, 1); 171 inet->inet_sport, usin->sin_port, sk, 1);
172 if (tmp < 0) { 172 if (tmp < 0) {
173 if (tmp == -ENETUNREACH) 173 if (tmp == -ENETUNREACH)
174 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 174 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
@@ -183,11 +183,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
183 if (!inet->opt || !inet->opt->srr) 183 if (!inet->opt || !inet->opt->srr)
184 daddr = rt->rt_dst; 184 daddr = rt->rt_dst;
185 185
186 if (!inet->saddr) 186 if (!inet->inet_saddr)
187 inet->saddr = rt->rt_src; 187 inet->inet_saddr = rt->rt_src;
188 inet->rcv_saddr = inet->saddr; 188 inet->inet_rcv_saddr = inet->inet_saddr;
189 189
190 if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) { 190 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
191 /* Reset inherited state */ 191 /* Reset inherited state */
192 tp->rx_opt.ts_recent = 0; 192 tp->rx_opt.ts_recent = 0;
193 tp->rx_opt.ts_recent_stamp = 0; 193 tp->rx_opt.ts_recent_stamp = 0;
@@ -204,20 +204,20 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
204 * when trying new connection. 204 * when trying new connection.
205 */ 205 */
206 if (peer != NULL && 206 if (peer != NULL &&
207 peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) { 207 (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
208 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; 208 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
209 tp->rx_opt.ts_recent = peer->tcp_ts; 209 tp->rx_opt.ts_recent = peer->tcp_ts;
210 } 210 }
211 } 211 }
212 212
213 inet->dport = usin->sin_port; 213 inet->inet_dport = usin->sin_port;
214 inet->daddr = daddr; 214 inet->inet_daddr = daddr;
215 215
216 inet_csk(sk)->icsk_ext_hdr_len = 0; 216 inet_csk(sk)->icsk_ext_hdr_len = 0;
217 if (inet->opt) 217 if (inet->opt)
218 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; 218 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
219 219
220 tp->rx_opt.mss_clamp = 536; 220 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
221 221
222 /* Socket identity is still unknown (sport may be zero). 222 /* Socket identity is still unknown (sport may be zero).
223 * However we set state to SYN-SENT and not releasing socket 223 * However we set state to SYN-SENT and not releasing socket
@@ -230,7 +230,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
230 goto failure; 230 goto failure;
231 231
232 err = ip_route_newports(&rt, IPPROTO_TCP, 232 err = ip_route_newports(&rt, IPPROTO_TCP,
233 inet->sport, inet->dport, sk); 233 inet->inet_sport, inet->inet_dport, sk);
234 if (err) 234 if (err)
235 goto failure; 235 goto failure;
236 236
@@ -239,12 +239,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
239 sk_setup_caps(sk, &rt->u.dst); 239 sk_setup_caps(sk, &rt->u.dst);
240 240
241 if (!tp->write_seq) 241 if (!tp->write_seq)
242 tp->write_seq = secure_tcp_sequence_number(inet->saddr, 242 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
243 inet->daddr, 243 inet->inet_daddr,
244 inet->sport, 244 inet->inet_sport,
245 usin->sin_port); 245 usin->sin_port);
246 246
247 inet->id = tp->write_seq ^ jiffies; 247 inet->inet_id = tp->write_seq ^ jiffies;
248 248
249 err = tcp_connect(sk); 249 err = tcp_connect(sk);
250 rt = NULL; 250 rt = NULL;
@@ -261,7 +261,7 @@ failure:
261 tcp_set_state(sk, TCP_CLOSE); 261 tcp_set_state(sk, TCP_CLOSE);
262 ip_rt_put(rt); 262 ip_rt_put(rt);
263 sk->sk_route_caps = 0; 263 sk->sk_route_caps = 0;
264 inet->dport = 0; 264 inet->inet_dport = 0;
265 return err; 265 return err;
266} 266}
267 267
@@ -520,12 +520,13 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
520 struct tcphdr *th = tcp_hdr(skb); 520 struct tcphdr *th = tcp_hdr(skb);
521 521
522 if (skb->ip_summed == CHECKSUM_PARTIAL) { 522 if (skb->ip_summed == CHECKSUM_PARTIAL) {
523 th->check = ~tcp_v4_check(len, inet->saddr, 523 th->check = ~tcp_v4_check(len, inet->inet_saddr,
524 inet->daddr, 0); 524 inet->inet_daddr, 0);
525 skb->csum_start = skb_transport_header(skb) - skb->head; 525 skb->csum_start = skb_transport_header(skb) - skb->head;
526 skb->csum_offset = offsetof(struct tcphdr, check); 526 skb->csum_offset = offsetof(struct tcphdr, check);
527 } else { 527 } else {
528 th->check = tcp_v4_check(len, inet->saddr, inet->daddr, 528 th->check = tcp_v4_check(len, inet->inet_saddr,
529 inet->inet_daddr,
529 csum_partial(th, 530 csum_partial(th,
530 th->doff << 2, 531 th->doff << 2,
531 skb->csum)); 532 skb->csum));
@@ -848,7 +849,7 @@ static struct tcp_md5sig_key *
848struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, 849struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
849 struct sock *addr_sk) 850 struct sock *addr_sk)
850{ 851{
851 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr); 852 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
852} 853}
853 854
854EXPORT_SYMBOL(tcp_v4_md5_lookup); 855EXPORT_SYMBOL(tcp_v4_md5_lookup);
@@ -923,7 +924,7 @@ EXPORT_SYMBOL(tcp_v4_md5_do_add);
923static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, 924static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
924 u8 *newkey, u8 newkeylen) 925 u8 *newkey, u8 newkeylen)
925{ 926{
926 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr, 927 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
927 newkey, newkeylen); 928 newkey, newkeylen);
928} 929}
929 930
@@ -1089,8 +1090,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1089 __be32 saddr, daddr; 1090 __be32 saddr, daddr;
1090 1091
1091 if (sk) { 1092 if (sk) {
1092 saddr = inet_sk(sk)->saddr; 1093 saddr = inet_sk(sk)->inet_saddr;
1093 daddr = inet_sk(sk)->daddr; 1094 daddr = inet_sk(sk)->inet_daddr;
1094 } else if (req) { 1095 } else if (req) {
1095 saddr = inet_rsk(req)->loc_addr; 1096 saddr = inet_rsk(req)->loc_addr;
1096 daddr = inet_rsk(req)->rmt_addr; 1097 daddr = inet_rsk(req)->rmt_addr;
@@ -1256,11 +1257,21 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1256 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; 1257 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1257#endif 1258#endif
1258 1259
1260 ireq = inet_rsk(req);
1261 ireq->loc_addr = daddr;
1262 ireq->rmt_addr = saddr;
1263 ireq->no_srccheck = inet_sk(sk)->transparent;
1264 ireq->opt = tcp_v4_save_options(sk, skb);
1265
1266 dst = inet_csk_route_req(sk, req);
1267 if(!dst)
1268 goto drop_and_free;
1269
1259 tcp_clear_options(&tmp_opt); 1270 tcp_clear_options(&tmp_opt);
1260 tmp_opt.mss_clamp = 536; 1271 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1261 tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss; 1272 tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
1262 1273
1263 tcp_parse_options(skb, &tmp_opt, 0); 1274 tcp_parse_options(skb, &tmp_opt, 0, dst);
1264 1275
1265 if (want_cookie && !tmp_opt.saw_tstamp) 1276 if (want_cookie && !tmp_opt.saw_tstamp)
1266 tcp_clear_options(&tmp_opt); 1277 tcp_clear_options(&tmp_opt);
@@ -1269,14 +1280,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1269 1280
1270 tcp_openreq_init(req, &tmp_opt, skb); 1281 tcp_openreq_init(req, &tmp_opt, skb);
1271 1282
1272 ireq = inet_rsk(req);
1273 ireq->loc_addr = daddr;
1274 ireq->rmt_addr = saddr;
1275 ireq->no_srccheck = inet_sk(sk)->transparent;
1276 ireq->opt = tcp_v4_save_options(sk, skb);
1277
1278 if (security_inet_conn_request(sk, skb, req)) 1283 if (security_inet_conn_request(sk, skb, req))
1279 goto drop_and_free; 1284 goto drop_and_release;
1280 1285
1281 if (!want_cookie) 1286 if (!want_cookie)
1282 TCP_ECN_create_request(req, tcp_hdr(skb)); 1287 TCP_ECN_create_request(req, tcp_hdr(skb));
@@ -1301,10 +1306,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1301 */ 1306 */
1302 if (tmp_opt.saw_tstamp && 1307 if (tmp_opt.saw_tstamp &&
1303 tcp_death_row.sysctl_tw_recycle && 1308 tcp_death_row.sysctl_tw_recycle &&
1304 (dst = inet_csk_route_req(sk, req)) != NULL &&
1305 (peer = rt_get_peer((struct rtable *)dst)) != NULL && 1309 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1306 peer->v4daddr == saddr) { 1310 peer->v4daddr == saddr) {
1307 if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL && 1311 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1308 (s32)(peer->tcp_ts - req->ts_recent) > 1312 (s32)(peer->tcp_ts - req->ts_recent) >
1309 TCP_PAWS_WINDOW) { 1313 TCP_PAWS_WINDOW) {
1310 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); 1314 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
@@ -1380,9 +1384,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1380 newtp = tcp_sk(newsk); 1384 newtp = tcp_sk(newsk);
1381 newinet = inet_sk(newsk); 1385 newinet = inet_sk(newsk);
1382 ireq = inet_rsk(req); 1386 ireq = inet_rsk(req);
1383 newinet->daddr = ireq->rmt_addr; 1387 newinet->inet_daddr = ireq->rmt_addr;
1384 newinet->rcv_saddr = ireq->loc_addr; 1388 newinet->inet_rcv_saddr = ireq->loc_addr;
1385 newinet->saddr = ireq->loc_addr; 1389 newinet->inet_saddr = ireq->loc_addr;
1386 newinet->opt = ireq->opt; 1390 newinet->opt = ireq->opt;
1387 ireq->opt = NULL; 1391 ireq->opt = NULL;
1388 newinet->mc_index = inet_iif(skb); 1392 newinet->mc_index = inet_iif(skb);
@@ -1390,7 +1394,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1390 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1394 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1391 if (newinet->opt) 1395 if (newinet->opt)
1392 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; 1396 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1393 newinet->id = newtp->write_seq ^ jiffies; 1397 newinet->inet_id = newtp->write_seq ^ jiffies;
1394 1398
1395 tcp_mtup_init(newsk); 1399 tcp_mtup_init(newsk);
1396 tcp_sync_mss(newsk, dst_mtu(dst)); 1400 tcp_sync_mss(newsk, dst_mtu(dst));
@@ -1403,7 +1407,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1403 1407
1404#ifdef CONFIG_TCP_MD5SIG 1408#ifdef CONFIG_TCP_MD5SIG
1405 /* Copy over the MD5 key from the original socket */ 1409 /* Copy over the MD5 key from the original socket */
1406 if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) { 1410 key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1411 if (key != NULL) {
1407 /* 1412 /*
1408 * We're using one, so create a matching key 1413 * We're using one, so create a matching key
1409 * on the newsk structure. If we fail to get 1414 * on the newsk structure. If we fail to get
@@ -1412,7 +1417,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1412 */ 1417 */
1413 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); 1418 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1414 if (newkey != NULL) 1419 if (newkey != NULL)
1415 tcp_v4_md5_do_add(newsk, newinet->daddr, 1420 tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
1416 newkey, key->keylen); 1421 newkey, key->keylen);
1417 newsk->sk_route_caps &= ~NETIF_F_GSO_MASK; 1422 newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1418 } 1423 }
@@ -1711,8 +1716,8 @@ int tcp_v4_remember_stamp(struct sock *sk)
1711 struct inet_peer *peer = NULL; 1716 struct inet_peer *peer = NULL;
1712 int release_it = 0; 1717 int release_it = 0;
1713 1718
1714 if (!rt || rt->rt_dst != inet->daddr) { 1719 if (!rt || rt->rt_dst != inet->inet_daddr) {
1715 peer = inet_getpeer(inet->daddr, 1); 1720 peer = inet_getpeer(inet->inet_daddr, 1);
1716 release_it = 1; 1721 release_it = 1;
1717 } else { 1722 } else {
1718 if (!rt->peer) 1723 if (!rt->peer)
@@ -1722,9 +1727,9 @@ int tcp_v4_remember_stamp(struct sock *sk)
1722 1727
1723 if (peer) { 1728 if (peer) {
1724 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || 1729 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1725 (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && 1730 ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
1726 peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) { 1731 peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
1727 peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp; 1732 peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
1728 peer->tcp_ts = tp->rx_opt.ts_recent; 1733 peer->tcp_ts = tp->rx_opt.ts_recent;
1729 } 1734 }
1730 if (release_it) 1735 if (release_it)
@@ -1743,9 +1748,9 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1743 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); 1748 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1744 1749
1745 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || 1750 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1746 (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && 1751 ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
1747 peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) { 1752 peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
1748 peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp; 1753 peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
1749 peer->tcp_ts = tcptw->tw_ts_recent; 1754 peer->tcp_ts = tcptw->tw_ts_recent;
1750 } 1755 }
1751 inet_putpeer(peer); 1756 inet_putpeer(peer);
@@ -1810,7 +1815,7 @@ static int tcp_v4_init_sock(struct sock *sk)
1810 */ 1815 */
1811 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 1816 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1812 tp->snd_cwnd_clamp = ~0; 1817 tp->snd_cwnd_clamp = ~0;
1813 tp->mss_cache = 536; 1818 tp->mss_cache = TCP_MSS_DEFAULT;
1814 1819
1815 tp->reordering = sysctl_tcp_reordering; 1820 tp->reordering = sysctl_tcp_reordering;
1816 icsk->icsk_ca_ops = &tcp_init_congestion_ops; 1821 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
@@ -2000,7 +2005,7 @@ static void *established_get_first(struct seq_file *seq)
2000 struct net *net = seq_file_net(seq); 2005 struct net *net = seq_file_net(seq);
2001 void *rc = NULL; 2006 void *rc = NULL;
2002 2007
2003 for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { 2008 for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2004 struct sock *sk; 2009 struct sock *sk;
2005 struct hlist_nulls_node *node; 2010 struct hlist_nulls_node *node;
2006 struct inet_timewait_sock *tw; 2011 struct inet_timewait_sock *tw;
@@ -2061,10 +2066,10 @@ get_tw:
2061 st->state = TCP_SEQ_STATE_ESTABLISHED; 2066 st->state = TCP_SEQ_STATE_ESTABLISHED;
2062 2067
2063 /* Look for next non empty bucket */ 2068 /* Look for next non empty bucket */
2064 while (++st->bucket < tcp_hashinfo.ehash_size && 2069 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2065 empty_bucket(st)) 2070 empty_bucket(st))
2066 ; 2071 ;
2067 if (st->bucket >= tcp_hashinfo.ehash_size) 2072 if (st->bucket > tcp_hashinfo.ehash_mask)
2068 return NULL; 2073 return NULL;
2069 2074
2070 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 2075 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
@@ -2225,7 +2230,7 @@ static void get_openreq4(struct sock *sk, struct request_sock *req,
2225 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n", 2230 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
2226 i, 2231 i,
2227 ireq->loc_addr, 2232 ireq->loc_addr,
2228 ntohs(inet_sk(sk)->sport), 2233 ntohs(inet_sk(sk)->inet_sport),
2229 ireq->rmt_addr, 2234 ireq->rmt_addr,
2230 ntohs(ireq->rmt_port), 2235 ntohs(ireq->rmt_port),
2231 TCP_SYN_RECV, 2236 TCP_SYN_RECV,
@@ -2248,10 +2253,10 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2248 struct tcp_sock *tp = tcp_sk(sk); 2253 struct tcp_sock *tp = tcp_sk(sk);
2249 const struct inet_connection_sock *icsk = inet_csk(sk); 2254 const struct inet_connection_sock *icsk = inet_csk(sk);
2250 struct inet_sock *inet = inet_sk(sk); 2255 struct inet_sock *inet = inet_sk(sk);
2251 __be32 dest = inet->daddr; 2256 __be32 dest = inet->inet_daddr;
2252 __be32 src = inet->rcv_saddr; 2257 __be32 src = inet->inet_rcv_saddr;
2253 __u16 destp = ntohs(inet->dport); 2258 __u16 destp = ntohs(inet->inet_dport);
2254 __u16 srcp = ntohs(inet->sport); 2259 __u16 srcp = ntohs(inet->inet_sport);
2255 2260
2256 if (icsk->icsk_pending == ICSK_TIME_RETRANS) { 2261 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2257 timer_active = 1; 2262 timer_active = 1;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4c03598ed924..4be22280e6b3 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -100,9 +100,9 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
100 struct tcp_options_received tmp_opt; 100 struct tcp_options_received tmp_opt;
101 int paws_reject = 0; 101 int paws_reject = 0;
102 102
103 tmp_opt.saw_tstamp = 0;
104 if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { 103 if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
105 tcp_parse_options(skb, &tmp_opt, 0); 104 tmp_opt.tstamp_ok = 1;
105 tcp_parse_options(skb, &tmp_opt, 1, NULL);
106 106
107 if (tmp_opt.saw_tstamp) { 107 if (tmp_opt.saw_tstamp) {
108 tmp_opt.ts_recent = tcptw->tw_ts_recent; 108 tmp_opt.ts_recent = tcptw->tw_ts_recent;
@@ -476,7 +476,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
476 if (newtp->af_specific->md5_lookup(sk, newsk)) 476 if (newtp->af_specific->md5_lookup(sk, newsk))
477 newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; 477 newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
478#endif 478#endif
479 if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len) 479 if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
480 newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; 480 newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
481 newtp->rx_opt.mss_clamp = req->mss; 481 newtp->rx_opt.mss_clamp = req->mss;
482 TCP_ECN_openreq_child(newtp, req); 482 TCP_ECN_openreq_child(newtp, req);
@@ -501,9 +501,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
501 struct tcp_options_received tmp_opt; 501 struct tcp_options_received tmp_opt;
502 struct sock *child; 502 struct sock *child;
503 503
504 tmp_opt.saw_tstamp = 0; 504 if ((th->doff > (sizeof(struct tcphdr)>>2)) && (req->ts_recent)) {
505 if (th->doff > (sizeof(struct tcphdr)>>2)) { 505 tmp_opt.tstamp_ok = 1;
506 tcp_parse_options(skb, &tmp_opt, 0); 506 tcp_parse_options(skb, &tmp_opt, 1, NULL);
507 507
508 if (tmp_opt.saw_tstamp) { 508 if (tmp_opt.saw_tstamp) {
509 tmp_opt.ts_recent = req->ts_recent; 509 tmp_opt.ts_recent = req->ts_recent;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index fcd278a7080e..616c686ca253 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -464,6 +464,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
464 struct tcp_md5sig_key **md5) { 464 struct tcp_md5sig_key **md5) {
465 struct tcp_sock *tp = tcp_sk(sk); 465 struct tcp_sock *tp = tcp_sk(sk);
466 unsigned size = 0; 466 unsigned size = 0;
467 struct dst_entry *dst = __sk_dst_get(sk);
467 468
468#ifdef CONFIG_TCP_MD5SIG 469#ifdef CONFIG_TCP_MD5SIG
469 *md5 = tp->af_specific->md5_lookup(sk, sk); 470 *md5 = tp->af_specific->md5_lookup(sk, sk);
@@ -487,18 +488,22 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
487 opts->mss = tcp_advertise_mss(sk); 488 opts->mss = tcp_advertise_mss(sk);
488 size += TCPOLEN_MSS_ALIGNED; 489 size += TCPOLEN_MSS_ALIGNED;
489 490
490 if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { 491 if (likely(sysctl_tcp_timestamps &&
492 !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) &&
493 *md5 == NULL)) {
491 opts->options |= OPTION_TS; 494 opts->options |= OPTION_TS;
492 opts->tsval = TCP_SKB_CB(skb)->when; 495 opts->tsval = TCP_SKB_CB(skb)->when;
493 opts->tsecr = tp->rx_opt.ts_recent; 496 opts->tsecr = tp->rx_opt.ts_recent;
494 size += TCPOLEN_TSTAMP_ALIGNED; 497 size += TCPOLEN_TSTAMP_ALIGNED;
495 } 498 }
496 if (likely(sysctl_tcp_window_scaling)) { 499 if (likely(sysctl_tcp_window_scaling &&
500 !dst_feature(dst, RTAX_FEATURE_NO_WSCALE))) {
497 opts->ws = tp->rx_opt.rcv_wscale; 501 opts->ws = tp->rx_opt.rcv_wscale;
498 opts->options |= OPTION_WSCALE; 502 opts->options |= OPTION_WSCALE;
499 size += TCPOLEN_WSCALE_ALIGNED; 503 size += TCPOLEN_WSCALE_ALIGNED;
500 } 504 }
501 if (likely(sysctl_tcp_sack)) { 505 if (likely(sysctl_tcp_sack &&
506 !dst_feature(dst, RTAX_FEATURE_NO_SACK))) {
502 opts->options |= OPTION_SACK_ADVERTISE; 507 opts->options |= OPTION_SACK_ADVERTISE;
503 if (unlikely(!(OPTION_TS & opts->options))) 508 if (unlikely(!(OPTION_TS & opts->options)))
504 size += TCPOLEN_SACKPERM_ALIGNED; 509 size += TCPOLEN_SACKPERM_ALIGNED;
@@ -661,8 +666,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
661 666
662 /* Build TCP header and checksum it. */ 667 /* Build TCP header and checksum it. */
663 th = tcp_hdr(skb); 668 th = tcp_hdr(skb);
664 th->source = inet->sport; 669 th->source = inet->inet_sport;
665 th->dest = inet->dport; 670 th->dest = inet->inet_dport;
666 th->seq = htonl(tcb->seq); 671 th->seq = htonl(tcb->seq);
667 th->ack_seq = htonl(tp->rcv_nxt); 672 th->ack_seq = htonl(tp->rcv_nxt);
668 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | 673 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
@@ -2315,7 +2320,9 @@ static void tcp_connect_init(struct sock *sk)
2315 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. 2320 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
2316 */ 2321 */
2317 tp->tcp_header_len = sizeof(struct tcphdr) + 2322 tp->tcp_header_len = sizeof(struct tcphdr) +
2318 (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); 2323 (sysctl_tcp_timestamps &&
2324 (!dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) ?
2325 TCPOLEN_TSTAMP_ALIGNED : 0));
2319 2326
2320#ifdef CONFIG_TCP_MD5SIG 2327#ifdef CONFIG_TCP_MD5SIG
2321 if (tp->af_specific->md5_lookup(sk, sk) != NULL) 2328 if (tp->af_specific->md5_lookup(sk, sk) != NULL)
@@ -2341,7 +2348,8 @@ static void tcp_connect_init(struct sock *sk)
2341 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), 2348 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
2342 &tp->rcv_wnd, 2349 &tp->rcv_wnd,
2343 &tp->window_clamp, 2350 &tp->window_clamp,
2344 sysctl_tcp_window_scaling, 2351 (sysctl_tcp_window_scaling &&
2352 !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)),
2345 &rcv_wscale); 2353 &rcv_wscale);
2346 2354
2347 tp->rx_opt.rcv_wscale = rcv_wscale; 2355 tp->rx_opt.rcv_wscale = rcv_wscale;
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 59f5b5e7c566..7a3cc2ffad84 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -94,7 +94,8 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
94 const struct inet_sock *inet = inet_sk(sk); 94 const struct inet_sock *inet = inet_sk(sk);
95 95
96 /* Only update if port matches */ 96 /* Only update if port matches */
97 if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port) 97 if ((port == 0 || ntohs(inet->inet_dport) == port ||
98 ntohs(inet->inet_sport) == port)
98 && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { 99 && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) {
99 100
100 spin_lock(&tcp_probe.lock); 101 spin_lock(&tcp_probe.lock);
@@ -103,10 +104,10 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
103 struct tcp_log *p = tcp_probe.log + tcp_probe.head; 104 struct tcp_log *p = tcp_probe.log + tcp_probe.head;
104 105
105 p->tstamp = ktime_get(); 106 p->tstamp = ktime_get();
106 p->saddr = inet->saddr; 107 p->saddr = inet->inet_saddr;
107 p->sport = inet->sport; 108 p->sport = inet->inet_sport;
108 p->daddr = inet->daddr; 109 p->daddr = inet->inet_daddr;
109 p->dport = inet->dport; 110 p->dport = inet->inet_dport;
110 p->length = skb->len; 111 p->length = skb->len;
111 p->snd_nxt = tp->snd_nxt; 112 p->snd_nxt = tp->snd_nxt;
112 p->snd_una = tp->snd_una; 113 p->snd_una = tp->snd_una;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index cdb2ca7684d4..8353a538cd4c 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -141,14 +141,14 @@ static int tcp_write_timeout(struct sock *sk)
141 141
142 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 142 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
143 if (icsk->icsk_retransmits) 143 if (icsk->icsk_retransmits)
144 dst_negative_advice(&sk->sk_dst_cache); 144 dst_negative_advice(&sk->sk_dst_cache, sk);
145 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 145 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
146 } else { 146 } else {
147 if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { 147 if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
148 /* Black hole detection */ 148 /* Black hole detection */
149 tcp_mtu_probing(icsk, sk); 149 tcp_mtu_probing(icsk, sk);
150 150
151 dst_negative_advice(&sk->sk_dst_cache); 151 dst_negative_advice(&sk->sk_dst_cache, sk);
152 } 152 }
153 153
154 retry_until = sysctl_tcp_retries2; 154 retry_until = sysctl_tcp_retries2;
@@ -303,15 +303,15 @@ void tcp_retransmit_timer(struct sock *sk)
303 struct inet_sock *inet = inet_sk(sk); 303 struct inet_sock *inet = inet_sk(sk);
304 if (sk->sk_family == AF_INET) { 304 if (sk->sk_family == AF_INET) {
305 LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", 305 LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
306 &inet->daddr, ntohs(inet->dport), 306 &inet->inet_daddr, ntohs(inet->inet_dport),
307 inet->num, tp->snd_una, tp->snd_nxt); 307 inet->inet_num, tp->snd_una, tp->snd_nxt);
308 } 308 }
309#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 309#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
310 else if (sk->sk_family == AF_INET6) { 310 else if (sk->sk_family == AF_INET6) {
311 struct ipv6_pinfo *np = inet6_sk(sk); 311 struct ipv6_pinfo *np = inet6_sk(sk);
312 LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", 312 LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
313 &np->daddr, ntohs(inet->dport), 313 &np->daddr, ntohs(inet->inet_dport),
314 inet->num, tp->snd_una, tp->snd_nxt); 314 inet->inet_num, tp->snd_una, tp->snd_nxt);
315 } 315 }
316#endif 316#endif
317#endif 317#endif
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0fa9f70e4b19..1eaf57567ebf 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -106,7 +106,7 @@
106#include <net/xfrm.h> 106#include <net/xfrm.h>
107#include "udp_impl.h" 107#include "udp_impl.h"
108 108
109struct udp_table udp_table; 109struct udp_table udp_table __read_mostly;
110EXPORT_SYMBOL(udp_table); 110EXPORT_SYMBOL(udp_table);
111 111
112int sysctl_udp_mem[3] __read_mostly; 112int sysctl_udp_mem[3] __read_mostly;
@@ -121,14 +121,16 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min);
121atomic_t udp_memory_allocated; 121atomic_t udp_memory_allocated;
122EXPORT_SYMBOL(udp_memory_allocated); 122EXPORT_SYMBOL(udp_memory_allocated);
123 123
124#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE) 124#define MAX_UDP_PORTS 65536
125#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
125 126
126static int udp_lib_lport_inuse(struct net *net, __u16 num, 127static int udp_lib_lport_inuse(struct net *net, __u16 num,
127 const struct udp_hslot *hslot, 128 const struct udp_hslot *hslot,
128 unsigned long *bitmap, 129 unsigned long *bitmap,
129 struct sock *sk, 130 struct sock *sk,
130 int (*saddr_comp)(const struct sock *sk1, 131 int (*saddr_comp)(const struct sock *sk1,
131 const struct sock *sk2)) 132 const struct sock *sk2),
133 unsigned int log)
132{ 134{
133 struct sock *sk2; 135 struct sock *sk2;
134 struct hlist_nulls_node *node; 136 struct hlist_nulls_node *node;
@@ -136,13 +138,13 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
136 sk_nulls_for_each(sk2, node, &hslot->head) 138 sk_nulls_for_each(sk2, node, &hslot->head)
137 if (net_eq(sock_net(sk2), net) && 139 if (net_eq(sock_net(sk2), net) &&
138 sk2 != sk && 140 sk2 != sk &&
139 (bitmap || sk2->sk_hash == num) && 141 (bitmap || udp_sk(sk2)->udp_port_hash == num) &&
140 (!sk2->sk_reuse || !sk->sk_reuse) && 142 (!sk2->sk_reuse || !sk->sk_reuse) &&
141 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if 143 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
142 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 144 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
143 (*saddr_comp)(sk, sk2)) { 145 (*saddr_comp)(sk, sk2)) {
144 if (bitmap) 146 if (bitmap)
145 __set_bit(sk2->sk_hash / UDP_HTABLE_SIZE, 147 __set_bit(udp_sk(sk2)->udp_port_hash >> log,
146 bitmap); 148 bitmap);
147 else 149 else
148 return 1; 150 return 1;
@@ -150,18 +152,51 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
150 return 0; 152 return 0;
151} 153}
152 154
155/*
156 * Note: we still hold spinlock of primary hash chain, so no other writer
157 * can insert/delete a socket with local_port == num
158 */
159static int udp_lib_lport_inuse2(struct net *net, __u16 num,
160 struct udp_hslot *hslot2,
161 struct sock *sk,
162 int (*saddr_comp)(const struct sock *sk1,
163 const struct sock *sk2))
164{
165 struct sock *sk2;
166 struct hlist_nulls_node *node;
167 int res = 0;
168
169 spin_lock(&hslot2->lock);
170 udp_portaddr_for_each_entry(sk2, node, &hslot2->head)
171 if (net_eq(sock_net(sk2), net) &&
172 sk2 != sk &&
173 (udp_sk(sk2)->udp_port_hash == num) &&
174 (!sk2->sk_reuse || !sk->sk_reuse) &&
175 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
176 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
177 (*saddr_comp)(sk, sk2)) {
178 res = 1;
179 break;
180 }
181 spin_unlock(&hslot2->lock);
182 return res;
183}
184
153/** 185/**
154 * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 186 * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
155 * 187 *
156 * @sk: socket struct in question 188 * @sk: socket struct in question
157 * @snum: port number to look up 189 * @snum: port number to look up
158 * @saddr_comp: AF-dependent comparison of bound local IP addresses 190 * @saddr_comp: AF-dependent comparison of bound local IP addresses
191 * @hash2_nulladdr: AF-dependant hash value in secondary hash chains,
192 * with NULL address
159 */ 193 */
160int udp_lib_get_port(struct sock *sk, unsigned short snum, 194int udp_lib_get_port(struct sock *sk, unsigned short snum,
161 int (*saddr_comp)(const struct sock *sk1, 195 int (*saddr_comp)(const struct sock *sk1,
162 const struct sock *sk2)) 196 const struct sock *sk2),
197 unsigned int hash2_nulladdr)
163{ 198{
164 struct udp_hslot *hslot; 199 struct udp_hslot *hslot, *hslot2;
165 struct udp_table *udptable = sk->sk_prot->h.udp_table; 200 struct udp_table *udptable = sk->sk_prot->h.udp_table;
166 int error = 1; 201 int error = 1;
167 struct net *net = sock_net(sk); 202 struct net *net = sock_net(sk);
@@ -180,13 +215,15 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
180 /* 215 /*
181 * force rand to be an odd multiple of UDP_HTABLE_SIZE 216 * force rand to be an odd multiple of UDP_HTABLE_SIZE
182 */ 217 */
183 rand = (rand | 1) * UDP_HTABLE_SIZE; 218 rand = (rand | 1) * (udptable->mask + 1);
184 for (last = first + UDP_HTABLE_SIZE; first != last; first++) { 219 for (last = first + udptable->mask + 1;
185 hslot = &udptable->hash[udp_hashfn(net, first)]; 220 first != last;
221 first++) {
222 hslot = udp_hashslot(udptable, net, first);
186 bitmap_zero(bitmap, PORTS_PER_CHAIN); 223 bitmap_zero(bitmap, PORTS_PER_CHAIN);
187 spin_lock_bh(&hslot->lock); 224 spin_lock_bh(&hslot->lock);
188 udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, 225 udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
189 saddr_comp); 226 saddr_comp, udptable->log);
190 227
191 snum = first; 228 snum = first;
192 /* 229 /*
@@ -196,7 +233,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
196 */ 233 */
197 do { 234 do {
198 if (low <= snum && snum <= high && 235 if (low <= snum && snum <= high &&
199 !test_bit(snum / UDP_HTABLE_SIZE, bitmap)) 236 !test_bit(snum >> udptable->log, bitmap))
200 goto found; 237 goto found;
201 snum += rand; 238 snum += rand;
202 } while (snum != first); 239 } while (snum != first);
@@ -204,17 +241,51 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
204 } 241 }
205 goto fail; 242 goto fail;
206 } else { 243 } else {
207 hslot = &udptable->hash[udp_hashfn(net, snum)]; 244 hslot = udp_hashslot(udptable, net, snum);
208 spin_lock_bh(&hslot->lock); 245 spin_lock_bh(&hslot->lock);
209 if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp)) 246 if (hslot->count > 10) {
247 int exist;
248 unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum;
249
250 slot2 &= udptable->mask;
251 hash2_nulladdr &= udptable->mask;
252
253 hslot2 = udp_hashslot2(udptable, slot2);
254 if (hslot->count < hslot2->count)
255 goto scan_primary_hash;
256
257 exist = udp_lib_lport_inuse2(net, snum, hslot2,
258 sk, saddr_comp);
259 if (!exist && (hash2_nulladdr != slot2)) {
260 hslot2 = udp_hashslot2(udptable, hash2_nulladdr);
261 exist = udp_lib_lport_inuse2(net, snum, hslot2,
262 sk, saddr_comp);
263 }
264 if (exist)
265 goto fail_unlock;
266 else
267 goto found;
268 }
269scan_primary_hash:
270 if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk,
271 saddr_comp, 0))
210 goto fail_unlock; 272 goto fail_unlock;
211 } 273 }
212found: 274found:
213 inet_sk(sk)->num = snum; 275 inet_sk(sk)->inet_num = snum;
214 sk->sk_hash = snum; 276 udp_sk(sk)->udp_port_hash = snum;
277 udp_sk(sk)->udp_portaddr_hash ^= snum;
215 if (sk_unhashed(sk)) { 278 if (sk_unhashed(sk)) {
216 sk_nulls_add_node_rcu(sk, &hslot->head); 279 sk_nulls_add_node_rcu(sk, &hslot->head);
280 hslot->count++;
217 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 281 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
282
283 hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
284 spin_lock(&hslot2->lock);
285 hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
286 &hslot2->head);
287 hslot2->count++;
288 spin_unlock(&hslot2->lock);
218 } 289 }
219 error = 0; 290 error = 0;
220fail_unlock: 291fail_unlock:
@@ -229,13 +300,26 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
229 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); 300 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
230 301
231 return (!ipv6_only_sock(sk2) && 302 return (!ipv6_only_sock(sk2) &&
232 (!inet1->rcv_saddr || !inet2->rcv_saddr || 303 (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr ||
233 inet1->rcv_saddr == inet2->rcv_saddr)); 304 inet1->inet_rcv_saddr == inet2->inet_rcv_saddr));
305}
306
307static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr,
308 unsigned int port)
309{
310 return jhash_1word(saddr, net_hash_mix(net)) ^ port;
234} 311}
235 312
236int udp_v4_get_port(struct sock *sk, unsigned short snum) 313int udp_v4_get_port(struct sock *sk, unsigned short snum)
237{ 314{
238 return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); 315 unsigned int hash2_nulladdr =
316 udp4_portaddr_hash(sock_net(sk), INADDR_ANY, snum);
317 unsigned int hash2_partial =
318 udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
319
320 /* precompute partial secondary hash */
321 udp_sk(sk)->udp_portaddr_hash = hash2_partial;
322 return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr);
239} 323}
240 324
241static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, 325static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
@@ -244,23 +328,23 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
244{ 328{
245 int score = -1; 329 int score = -1;
246 330
247 if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && 331 if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
248 !ipv6_only_sock(sk)) { 332 !ipv6_only_sock(sk)) {
249 struct inet_sock *inet = inet_sk(sk); 333 struct inet_sock *inet = inet_sk(sk);
250 334
251 score = (sk->sk_family == PF_INET ? 1 : 0); 335 score = (sk->sk_family == PF_INET ? 1 : 0);
252 if (inet->rcv_saddr) { 336 if (inet->inet_rcv_saddr) {
253 if (inet->rcv_saddr != daddr) 337 if (inet->inet_rcv_saddr != daddr)
254 return -1; 338 return -1;
255 score += 2; 339 score += 2;
256 } 340 }
257 if (inet->daddr) { 341 if (inet->inet_daddr) {
258 if (inet->daddr != saddr) 342 if (inet->inet_daddr != saddr)
259 return -1; 343 return -1;
260 score += 2; 344 score += 2;
261 } 345 }
262 if (inet->dport) { 346 if (inet->inet_dport) {
263 if (inet->dport != sport) 347 if (inet->inet_dport != sport)
264 return -1; 348 return -1;
265 score += 2; 349 score += 2;
266 } 350 }
@@ -273,6 +357,89 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
273 return score; 357 return score;
274} 358}
275 359
360/*
361 * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num)
362 */
363#define SCORE2_MAX (1 + 2 + 2 + 2)
364static inline int compute_score2(struct sock *sk, struct net *net,
365 __be32 saddr, __be16 sport,
366 __be32 daddr, unsigned int hnum, int dif)
367{
368 int score = -1;
369
370 if (net_eq(sock_net(sk), net) && !ipv6_only_sock(sk)) {
371 struct inet_sock *inet = inet_sk(sk);
372
373 if (inet->inet_rcv_saddr != daddr)
374 return -1;
375 if (inet->inet_num != hnum)
376 return -1;
377
378 score = (sk->sk_family == PF_INET ? 1 : 0);
379 if (inet->inet_daddr) {
380 if (inet->inet_daddr != saddr)
381 return -1;
382 score += 2;
383 }
384 if (inet->inet_dport) {
385 if (inet->inet_dport != sport)
386 return -1;
387 score += 2;
388 }
389 if (sk->sk_bound_dev_if) {
390 if (sk->sk_bound_dev_if != dif)
391 return -1;
392 score += 2;
393 }
394 }
395 return score;
396}
397
398
399/* called with read_rcu_lock() */
400static struct sock *udp4_lib_lookup2(struct net *net,
401 __be32 saddr, __be16 sport,
402 __be32 daddr, unsigned int hnum, int dif,
403 struct udp_hslot *hslot2, unsigned int slot2)
404{
405 struct sock *sk, *result;
406 struct hlist_nulls_node *node;
407 int score, badness;
408
409begin:
410 result = NULL;
411 badness = -1;
412 udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
413 score = compute_score2(sk, net, saddr, sport,
414 daddr, hnum, dif);
415 if (score > badness) {
416 result = sk;
417 badness = score;
418 if (score == SCORE2_MAX)
419 goto exact_match;
420 }
421 }
422 /*
423 * if the nulls value we got at the end of this lookup is
424 * not the expected one, we must restart lookup.
425 * We probably met an item that was moved to another chain.
426 */
427 if (get_nulls_value(node) != slot2)
428 goto begin;
429
430 if (result) {
431exact_match:
432 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
433 result = NULL;
434 else if (unlikely(compute_score2(result, net, saddr, sport,
435 daddr, hnum, dif) < badness)) {
436 sock_put(result);
437 goto begin;
438 }
439 }
440 return result;
441}
442
276/* UDP is nearly always wildcards out the wazoo, it makes no sense to try 443/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
277 * harder than this. -DaveM 444 * harder than this. -DaveM
278 */ 445 */
@@ -283,11 +450,35 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
283 struct sock *sk, *result; 450 struct sock *sk, *result;
284 struct hlist_nulls_node *node; 451 struct hlist_nulls_node *node;
285 unsigned short hnum = ntohs(dport); 452 unsigned short hnum = ntohs(dport);
286 unsigned int hash = udp_hashfn(net, hnum); 453 unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
287 struct udp_hslot *hslot = &udptable->hash[hash]; 454 struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
288 int score, badness; 455 int score, badness;
289 456
290 rcu_read_lock(); 457 rcu_read_lock();
458 if (hslot->count > 10) {
459 hash2 = udp4_portaddr_hash(net, daddr, hnum);
460 slot2 = hash2 & udptable->mask;
461 hslot2 = &udptable->hash2[slot2];
462 if (hslot->count < hslot2->count)
463 goto begin;
464
465 result = udp4_lib_lookup2(net, saddr, sport,
466 daddr, hnum, dif,
467 hslot2, slot2);
468 if (!result) {
469 hash2 = udp4_portaddr_hash(net, INADDR_ANY, hnum);
470 slot2 = hash2 & udptable->mask;
471 hslot2 = &udptable->hash2[slot2];
472 if (hslot->count < hslot2->count)
473 goto begin;
474
475 result = udp4_lib_lookup2(net, INADDR_ANY, sport,
476 daddr, hnum, dif,
477 hslot2, slot2);
478 }
479 rcu_read_unlock();
480 return result;
481 }
291begin: 482begin:
292 result = NULL; 483 result = NULL;
293 badness = -1; 484 badness = -1;
@@ -304,7 +495,7 @@ begin:
304 * not the expected one, we must restart lookup. 495 * not the expected one, we must restart lookup.
305 * We probably met an item that was moved to another chain. 496 * We probably met an item that was moved to another chain.
306 */ 497 */
307 if (get_nulls_value(node) != hash) 498 if (get_nulls_value(node) != slot)
308 goto begin; 499 goto begin;
309 500
310 if (result) { 501 if (result) {
@@ -355,10 +546,11 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
355 struct inet_sock *inet = inet_sk(s); 546 struct inet_sock *inet = inet_sk(s);
356 547
357 if (!net_eq(sock_net(s), net) || 548 if (!net_eq(sock_net(s), net) ||
358 s->sk_hash != hnum || 549 udp_sk(s)->udp_port_hash != hnum ||
359 (inet->daddr && inet->daddr != rmt_addr) || 550 (inet->inet_daddr && inet->inet_daddr != rmt_addr) ||
360 (inet->dport != rmt_port && inet->dport) || 551 (inet->inet_dport != rmt_port && inet->inet_dport) ||
361 (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || 552 (inet->inet_rcv_saddr &&
553 inet->inet_rcv_saddr != loc_addr) ||
362 ipv6_only_sock(s) || 554 ipv6_only_sock(s) ||
363 (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) 555 (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
364 continue; 556 continue;
@@ -642,14 +834,14 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
642 } else { 834 } else {
643 if (sk->sk_state != TCP_ESTABLISHED) 835 if (sk->sk_state != TCP_ESTABLISHED)
644 return -EDESTADDRREQ; 836 return -EDESTADDRREQ;
645 daddr = inet->daddr; 837 daddr = inet->inet_daddr;
646 dport = inet->dport; 838 dport = inet->inet_dport;
647 /* Open fast path for connected socket. 839 /* Open fast path for connected socket.
648 Route will not be used, if at least one option is set. 840 Route will not be used, if at least one option is set.
649 */ 841 */
650 connected = 1; 842 connected = 1;
651 } 843 }
652 ipc.addr = inet->saddr; 844 ipc.addr = inet->inet_saddr;
653 845
654 ipc.oif = sk->sk_bound_dev_if; 846 ipc.oif = sk->sk_bound_dev_if;
655 err = sock_tx_timestamp(msg, sk, &ipc.shtx); 847 err = sock_tx_timestamp(msg, sk, &ipc.shtx);
@@ -704,7 +896,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
704 .proto = sk->sk_protocol, 896 .proto = sk->sk_protocol,
705 .flags = inet_sk_flowi_flags(sk), 897 .flags = inet_sk_flowi_flags(sk),
706 .uli_u = { .ports = 898 .uli_u = { .ports =
707 { .sport = inet->sport, 899 { .sport = inet->inet_sport,
708 .dport = dport } } }; 900 .dport = dport } } };
709 struct net *net = sock_net(sk); 901 struct net *net = sock_net(sk);
710 902
@@ -748,7 +940,7 @@ back_from_confirm:
748 inet->cork.fl.fl4_dst = daddr; 940 inet->cork.fl.fl4_dst = daddr;
749 inet->cork.fl.fl_ip_dport = dport; 941 inet->cork.fl.fl_ip_dport = dport;
750 inet->cork.fl.fl4_src = saddr; 942 inet->cork.fl.fl4_src = saddr;
751 inet->cork.fl.fl_ip_sport = inet->sport; 943 inet->cork.fl.fl_ip_sport = inet->inet_sport;
752 up->pending = AF_INET; 944 up->pending = AF_INET;
753 945
754do_append_data: 946do_append_data:
@@ -862,6 +1054,7 @@ static unsigned int first_packet_length(struct sock *sk)
862 udp_lib_checksum_complete(skb)) { 1054 udp_lib_checksum_complete(skb)) {
863 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, 1055 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
864 IS_UDPLITE(sk)); 1056 IS_UDPLITE(sk));
1057 atomic_inc(&sk->sk_drops);
865 __skb_unlink(skb, rcvq); 1058 __skb_unlink(skb, rcvq);
866 __skb_queue_tail(&list_kill, skb); 1059 __skb_queue_tail(&list_kill, skb);
867 } 1060 }
@@ -982,7 +1175,7 @@ try_again:
982 UDP_INC_STATS_USER(sock_net(sk), 1175 UDP_INC_STATS_USER(sock_net(sk),
983 UDP_MIB_INDATAGRAMS, is_udplite); 1176 UDP_MIB_INDATAGRAMS, is_udplite);
984 1177
985 sock_recv_timestamp(msg, sk, skb); 1178 sock_recv_ts_and_drops(msg, sk, skb);
986 1179
987 /* Copy the address. */ 1180 /* Copy the address. */
988 if (sin) { 1181 if (sin) {
@@ -1023,15 +1216,15 @@ int udp_disconnect(struct sock *sk, int flags)
1023 */ 1216 */
1024 1217
1025 sk->sk_state = TCP_CLOSE; 1218 sk->sk_state = TCP_CLOSE;
1026 inet->daddr = 0; 1219 inet->inet_daddr = 0;
1027 inet->dport = 0; 1220 inet->inet_dport = 0;
1028 sk->sk_bound_dev_if = 0; 1221 sk->sk_bound_dev_if = 0;
1029 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 1222 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
1030 inet_reset_saddr(sk); 1223 inet_reset_saddr(sk);
1031 1224
1032 if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { 1225 if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
1033 sk->sk_prot->unhash(sk); 1226 sk->sk_prot->unhash(sk);
1034 inet->sport = 0; 1227 inet->inet_sport = 0;
1035 } 1228 }
1036 sk_dst_reset(sk); 1229 sk_dst_reset(sk);
1037 return 0; 1230 return 0;
@@ -1042,13 +1235,22 @@ void udp_lib_unhash(struct sock *sk)
1042{ 1235{
1043 if (sk_hashed(sk)) { 1236 if (sk_hashed(sk)) {
1044 struct udp_table *udptable = sk->sk_prot->h.udp_table; 1237 struct udp_table *udptable = sk->sk_prot->h.udp_table;
1045 unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash); 1238 struct udp_hslot *hslot, *hslot2;
1046 struct udp_hslot *hslot = &udptable->hash[hash]; 1239
1240 hslot = udp_hashslot(udptable, sock_net(sk),
1241 udp_sk(sk)->udp_port_hash);
1242 hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
1047 1243
1048 spin_lock_bh(&hslot->lock); 1244 spin_lock_bh(&hslot->lock);
1049 if (sk_nulls_del_node_init_rcu(sk)) { 1245 if (sk_nulls_del_node_init_rcu(sk)) {
1050 inet_sk(sk)->num = 0; 1246 hslot->count--;
1247 inet_sk(sk)->inet_num = 0;
1051 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 1248 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
1249
1250 spin_lock(&hslot2->lock);
1251 hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
1252 hslot2->count--;
1253 spin_unlock(&hslot2->lock);
1052 } 1254 }
1053 spin_unlock_bh(&hslot->lock); 1255 spin_unlock_bh(&hslot->lock);
1054 } 1256 }
@@ -1057,25 +1259,22 @@ EXPORT_SYMBOL(udp_lib_unhash);
1057 1259
1058static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 1260static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1059{ 1261{
1060 int is_udplite = IS_UDPLITE(sk); 1262 int rc = sock_queue_rcv_skb(sk, skb);
1061 int rc; 1263
1264 if (rc < 0) {
1265 int is_udplite = IS_UDPLITE(sk);
1062 1266
1063 if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) {
1064 /* Note that an ENOMEM error is charged twice */ 1267 /* Note that an ENOMEM error is charged twice */
1065 if (rc == -ENOMEM) { 1268 if (rc == -ENOMEM)
1066 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, 1269 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
1067 is_udplite); 1270 is_udplite);
1068 atomic_inc(&sk->sk_drops); 1271 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1069 } 1272 kfree_skb(skb);
1070 goto drop; 1273 return -1;
1071 } 1274 }
1072 1275
1073 return 0; 1276 return 0;
1074 1277
1075drop:
1076 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1077 kfree_skb(skb);
1078 return -1;
1079} 1278}
1080 1279
1081/* returns: 1280/* returns:
@@ -1182,53 +1381,88 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1182 1381
1183drop: 1382drop:
1184 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 1383 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1384 atomic_inc(&sk->sk_drops);
1185 kfree_skb(skb); 1385 kfree_skb(skb);
1186 return -1; 1386 return -1;
1187} 1387}
1188 1388
1389
1390static void flush_stack(struct sock **stack, unsigned int count,
1391 struct sk_buff *skb, unsigned int final)
1392{
1393 unsigned int i;
1394 struct sk_buff *skb1 = NULL;
1395 struct sock *sk;
1396
1397 for (i = 0; i < count; i++) {
1398 sk = stack[i];
1399 if (likely(skb1 == NULL))
1400 skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
1401
1402 if (!skb1) {
1403 atomic_inc(&sk->sk_drops);
1404 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
1405 IS_UDPLITE(sk));
1406 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
1407 IS_UDPLITE(sk));
1408 }
1409
1410 if (skb1 && udp_queue_rcv_skb(sk, skb1) <= 0)
1411 skb1 = NULL;
1412 }
1413 if (unlikely(skb1))
1414 kfree_skb(skb1);
1415}
1416
1189/* 1417/*
1190 * Multicasts and broadcasts go to each listener. 1418 * Multicasts and broadcasts go to each listener.
1191 * 1419 *
1192 * Note: called only from the BH handler context, 1420 * Note: called only from the BH handler context.
1193 * so we don't need to lock the hashes.
1194 */ 1421 */
1195static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, 1422static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
1196 struct udphdr *uh, 1423 struct udphdr *uh,
1197 __be32 saddr, __be32 daddr, 1424 __be32 saddr, __be32 daddr,
1198 struct udp_table *udptable) 1425 struct udp_table *udptable)
1199{ 1426{
1200 struct sock *sk; 1427 struct sock *sk, *stack[256 / sizeof(struct sock *)];
1201 struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; 1428 struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
1202 int dif; 1429 int dif;
1430 unsigned int i, count = 0;
1203 1431
1204 spin_lock(&hslot->lock); 1432 spin_lock(&hslot->lock);
1205 sk = sk_nulls_head(&hslot->head); 1433 sk = sk_nulls_head(&hslot->head);
1206 dif = skb->dev->ifindex; 1434 dif = skb->dev->ifindex;
1207 sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); 1435 sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
1208 if (sk) { 1436 while (sk) {
1209 struct sock *sknext = NULL; 1437 stack[count++] = sk;
1210 1438 sk = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest,
1211 do { 1439 daddr, uh->source, saddr, dif);
1212 struct sk_buff *skb1 = skb; 1440 if (unlikely(count == ARRAY_SIZE(stack))) {
1213 1441 if (!sk)
1214 sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, 1442 break;
1215 daddr, uh->source, saddr, 1443 flush_stack(stack, count, skb, ~0);
1216 dif); 1444 count = 0;
1217 if (sknext) 1445 }
1218 skb1 = skb_clone(skb, GFP_ATOMIC); 1446 }
1219 1447 /*
1220 if (skb1) { 1448 * before releasing chain lock, we must take a reference on sockets
1221 int ret = udp_queue_rcv_skb(sk, skb1); 1449 */
1222 if (ret > 0) 1450 for (i = 0; i < count; i++)
1223 /* we should probably re-process instead 1451 sock_hold(stack[i]);
1224 * of dropping packets here. */ 1452
1225 kfree_skb(skb1);
1226 }
1227 sk = sknext;
1228 } while (sknext);
1229 } else
1230 consume_skb(skb);
1231 spin_unlock(&hslot->lock); 1453 spin_unlock(&hslot->lock);
1454
1455 /*
1456 * do the slow work with no lock held
1457 */
1458 if (count) {
1459 flush_stack(stack, count, skb, count - 1);
1460
1461 for (i = 0; i < count; i++)
1462 sock_put(stack[i]);
1463 } else {
1464 kfree_skb(skb);
1465 }
1232 return 0; 1466 return 0;
1233} 1467}
1234 1468
@@ -1620,9 +1854,14 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
1620 struct udp_iter_state *state = seq->private; 1854 struct udp_iter_state *state = seq->private;
1621 struct net *net = seq_file_net(seq); 1855 struct net *net = seq_file_net(seq);
1622 1856
1623 for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { 1857 for (state->bucket = start; state->bucket <= state->udp_table->mask;
1858 ++state->bucket) {
1624 struct hlist_nulls_node *node; 1859 struct hlist_nulls_node *node;
1625 struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; 1860 struct udp_hslot *hslot = &state->udp_table->hash[state->bucket];
1861
1862 if (hlist_nulls_empty(&hslot->head))
1863 continue;
1864
1626 spin_lock_bh(&hslot->lock); 1865 spin_lock_bh(&hslot->lock);
1627 sk_nulls_for_each(sk, node, &hslot->head) { 1866 sk_nulls_for_each(sk, node, &hslot->head) {
1628 if (!net_eq(sock_net(sk), net)) 1867 if (!net_eq(sock_net(sk), net))
@@ -1647,7 +1886,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
1647 } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); 1886 } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
1648 1887
1649 if (!sk) { 1888 if (!sk) {
1650 if (state->bucket < UDP_HTABLE_SIZE) 1889 if (state->bucket <= state->udp_table->mask)
1651 spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); 1890 spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
1652 return udp_get_first(seq, state->bucket + 1); 1891 return udp_get_first(seq, state->bucket + 1);
1653 } 1892 }
@@ -1667,7 +1906,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
1667static void *udp_seq_start(struct seq_file *seq, loff_t *pos) 1906static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
1668{ 1907{
1669 struct udp_iter_state *state = seq->private; 1908 struct udp_iter_state *state = seq->private;
1670 state->bucket = UDP_HTABLE_SIZE; 1909 state->bucket = MAX_UDP_PORTS;
1671 1910
1672 return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; 1911 return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
1673} 1912}
@@ -1689,7 +1928,7 @@ static void udp_seq_stop(struct seq_file *seq, void *v)
1689{ 1928{
1690 struct udp_iter_state *state = seq->private; 1929 struct udp_iter_state *state = seq->private;
1691 1930
1692 if (state->bucket < UDP_HTABLE_SIZE) 1931 if (state->bucket <= state->udp_table->mask)
1693 spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); 1932 spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
1694} 1933}
1695 1934
@@ -1744,12 +1983,12 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
1744 int bucket, int *len) 1983 int bucket, int *len)
1745{ 1984{
1746 struct inet_sock *inet = inet_sk(sp); 1985 struct inet_sock *inet = inet_sk(sp);
1747 __be32 dest = inet->daddr; 1986 __be32 dest = inet->inet_daddr;
1748 __be32 src = inet->rcv_saddr; 1987 __be32 src = inet->inet_rcv_saddr;
1749 __u16 destp = ntohs(inet->dport); 1988 __u16 destp = ntohs(inet->inet_dport);
1750 __u16 srcp = ntohs(inet->sport); 1989 __u16 srcp = ntohs(inet->inet_sport);
1751 1990
1752 seq_printf(f, "%4d: %08X:%04X %08X:%04X" 1991 seq_printf(f, "%5d: %08X:%04X %08X:%04X"
1753 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", 1992 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n",
1754 bucket, src, srcp, dest, destp, sp->sk_state, 1993 bucket, src, srcp, dest, destp, sp->sk_state,
1755 sk_wmem_alloc_get(sp), 1994 sk_wmem_alloc_get(sp),
@@ -1815,21 +2054,60 @@ void udp4_proc_exit(void)
1815} 2054}
1816#endif /* CONFIG_PROC_FS */ 2055#endif /* CONFIG_PROC_FS */
1817 2056
1818void __init udp_table_init(struct udp_table *table) 2057static __initdata unsigned long uhash_entries;
2058static int __init set_uhash_entries(char *str)
1819{ 2059{
1820 int i; 2060 if (!str)
2061 return 0;
2062 uhash_entries = simple_strtoul(str, &str, 0);
2063 if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN)
2064 uhash_entries = UDP_HTABLE_SIZE_MIN;
2065 return 1;
2066}
2067__setup("uhash_entries=", set_uhash_entries);
1821 2068
1822 for (i = 0; i < UDP_HTABLE_SIZE; i++) { 2069void __init udp_table_init(struct udp_table *table, const char *name)
2070{
2071 unsigned int i;
2072
2073 if (!CONFIG_BASE_SMALL)
2074 table->hash = alloc_large_system_hash(name,
2075 2 * sizeof(struct udp_hslot),
2076 uhash_entries,
2077 21, /* one slot per 2 MB */
2078 0,
2079 &table->log,
2080 &table->mask,
2081 64 * 1024);
2082 /*
2083 * Make sure hash table has the minimum size
2084 */
2085 if (CONFIG_BASE_SMALL || table->mask < UDP_HTABLE_SIZE_MIN - 1) {
2086 table->hash = kmalloc(UDP_HTABLE_SIZE_MIN *
2087 2 * sizeof(struct udp_hslot), GFP_KERNEL);
2088 if (!table->hash)
2089 panic(name);
2090 table->log = ilog2(UDP_HTABLE_SIZE_MIN);
2091 table->mask = UDP_HTABLE_SIZE_MIN - 1;
2092 }
2093 table->hash2 = table->hash + (table->mask + 1);
2094 for (i = 0; i <= table->mask; i++) {
1823 INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); 2095 INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i);
2096 table->hash[i].count = 0;
1824 spin_lock_init(&table->hash[i].lock); 2097 spin_lock_init(&table->hash[i].lock);
1825 } 2098 }
2099 for (i = 0; i <= table->mask; i++) {
2100 INIT_HLIST_NULLS_HEAD(&table->hash2[i].head, i);
2101 table->hash2[i].count = 0;
2102 spin_lock_init(&table->hash2[i].lock);
2103 }
1826} 2104}
1827 2105
1828void __init udp_init(void) 2106void __init udp_init(void)
1829{ 2107{
1830 unsigned long nr_pages, limit; 2108 unsigned long nr_pages, limit;
1831 2109
1832 udp_table_init(&udp_table); 2110 udp_table_init(&udp_table, "UDP");
1833 /* Set the pressure threshold up by the same strategy of TCP. It is a 2111 /* Set the pressure threshold up by the same strategy of TCP. It is a
1834 * fraction of global memory that is up to 1/2 at 256 MB, decreasing 2112 * fraction of global memory that is up to 1/2 at 256 MB, decreasing
1835 * toward zero with the amount of memory, with a floor of 128 pages. 2113 * toward zero with the amount of memory, with a floor of 128 pages.
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 95248d7f75ec..66f79513f4a5 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -12,7 +12,7 @@
12 */ 12 */
13#include "udp_impl.h" 13#include "udp_impl.h"
14 14
15struct udp_table udplite_table; 15struct udp_table udplite_table __read_mostly;
16EXPORT_SYMBOL(udplite_table); 16EXPORT_SYMBOL(udplite_table);
17 17
18static int udplite_rcv(struct sk_buff *skb) 18static int udplite_rcv(struct sk_buff *skb)
@@ -64,7 +64,6 @@ static struct inet_protosw udplite4_protosw = {
64 .protocol = IPPROTO_UDPLITE, 64 .protocol = IPPROTO_UDPLITE,
65 .prot = &udplite_prot, 65 .prot = &udplite_prot,
66 .ops = &inet_dgram_ops, 66 .ops = &inet_dgram_ops,
67 .capability = -1,
68 .no_check = 0, /* must checksum (RFC 3828) */ 67 .no_check = 0, /* must checksum (RFC 3828) */
69 .flags = INET_PROTOSW_PERMANENT, 68 .flags = INET_PROTOSW_PERMANENT,
70}; 69};
@@ -110,7 +109,7 @@ static inline int udplite4_proc_init(void)
110 109
111void __init udplite4_register(void) 110void __init udplite4_register(void)
112{ 111{
113 udp_table_init(&udplite_table); 112 udp_table_init(&udplite_table, "UDP-Lite");
114 if (proto_register(&udplite_prot, 1)) 113 if (proto_register(&udplite_prot, 1))
115 goto out_register_err; 114 goto out_register_err;
116 115