aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c74
-rw-r--r--net/ipv4/ah4.c295
-rw-r--r--net/ipv4/cipso_ipv4.c2
-rw-r--r--net/ipv4/datagram.c18
-rw-r--r--net/ipv4/devinet.c96
-rw-r--r--net/ipv4/fib_frontend.c26
-rw-r--r--net/ipv4/fib_hash.c25
-rw-r--r--net/ipv4/fib_rules.c2
-rw-r--r--net/ipv4/fib_trie.c26
-rw-r--r--net/ipv4/icmp.c13
-rw-r--r--net/ipv4/inet_connection_sock.c21
-rw-r--r--net/ipv4/inet_diag.c28
-rw-r--r--net/ipv4/inet_hashtables.c36
-rw-r--r--net/ipv4/inet_timewait_sock.c14
-rw-r--r--net/ipv4/ip_fragment.c7
-rw-r--r--net/ipv4/ip_gre.c62
-rw-r--r--net/ipv4/ip_input.c2
-rw-r--r--net/ipv4/ip_output.c15
-rw-r--r--net/ipv4/ip_sockglue.c12
-rw-r--r--net/ipv4/ipip.c71
-rw-r--r--net/ipv4/ipmr.c29
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c8
-rw-r--r--net/ipv4/raw.c33
-rw-r--r--net/ipv4/route.c3
-rw-r--r--net/ipv4/syncookies.c30
-rw-r--r--net/ipv4/tcp.c15
-rw-r--r--net/ipv4/tcp_input.c28
-rw-r--r--net/ipv4/tcp_ipv4.c97
-rw-r--r--net/ipv4/tcp_minisocks.c10
-rw-r--r--net/ipv4/tcp_output.c22
-rw-r--r--net/ipv4/tcp_probe.c11
-rw-r--r--net/ipv4/tcp_timer.c12
-rw-r--r--net/ipv4/udp.c409
-rw-r--r--net/ipv4/udplite.c5
34 files changed, 1004 insertions, 553 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 57737b8d1711..7d12c6a9b19b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -174,12 +174,12 @@ static int inet_autobind(struct sock *sk)
174 /* We may need to bind the socket. */ 174 /* We may need to bind the socket. */
175 lock_sock(sk); 175 lock_sock(sk);
176 inet = inet_sk(sk); 176 inet = inet_sk(sk);
177 if (!inet->num) { 177 if (!inet->inet_num) {
178 if (sk->sk_prot->get_port(sk, 0)) { 178 if (sk->sk_prot->get_port(sk, 0)) {
179 release_sock(sk); 179 release_sock(sk);
180 return -EAGAIN; 180 return -EAGAIN;
181 } 181 }
182 inet->sport = htons(inet->num); 182 inet->inet_sport = htons(inet->inet_num);
183 } 183 }
184 release_sock(sk); 184 release_sock(sk);
185 return 0; 185 return 0;
@@ -262,7 +262,8 @@ static inline int inet_netns_ok(struct net *net, int protocol)
262 * Create an inet socket. 262 * Create an inet socket.
263 */ 263 */
264 264
265static int inet_create(struct net *net, struct socket *sock, int protocol) 265static int inet_create(struct net *net, struct socket *sock, int protocol,
266 int kern)
266{ 267{
267 struct sock *sk; 268 struct sock *sk;
268 struct inet_protosw *answer; 269 struct inet_protosw *answer;
@@ -325,7 +326,7 @@ lookup_protocol:
325 } 326 }
326 327
327 err = -EPERM; 328 err = -EPERM;
328 if (answer->capability > 0 && !capable(answer->capability)) 329 if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
329 goto out_rcu_unlock; 330 goto out_rcu_unlock;
330 331
331 err = -EAFNOSUPPORT; 332 err = -EAFNOSUPPORT;
@@ -354,7 +355,7 @@ lookup_protocol:
354 inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; 355 inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
355 356
356 if (SOCK_RAW == sock->type) { 357 if (SOCK_RAW == sock->type) {
357 inet->num = protocol; 358 inet->inet_num = protocol;
358 if (IPPROTO_RAW == protocol) 359 if (IPPROTO_RAW == protocol)
359 inet->hdrincl = 1; 360 inet->hdrincl = 1;
360 } 361 }
@@ -364,7 +365,7 @@ lookup_protocol:
364 else 365 else
365 inet->pmtudisc = IP_PMTUDISC_WANT; 366 inet->pmtudisc = IP_PMTUDISC_WANT;
366 367
367 inet->id = 0; 368 inet->inet_id = 0;
368 369
369 sock_init_data(sock, sk); 370 sock_init_data(sock, sk);
370 371
@@ -381,13 +382,13 @@ lookup_protocol:
381 382
382 sk_refcnt_debug_inc(sk); 383 sk_refcnt_debug_inc(sk);
383 384
384 if (inet->num) { 385 if (inet->inet_num) {
385 /* It assumes that any protocol which allows 386 /* It assumes that any protocol which allows
386 * the user to assign a number at socket 387 * the user to assign a number at socket
387 * creation time automatically 388 * creation time automatically
388 * shares. 389 * shares.
389 */ 390 */
390 inet->sport = htons(inet->num); 391 inet->inet_sport = htons(inet->inet_num);
391 /* Add to protocol hash chains. */ 392 /* Add to protocol hash chains. */
392 sk->sk_prot->hash(sk); 393 sk->sk_prot->hash(sk);
393 } 394 }
@@ -494,27 +495,27 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
494 495
495 /* Check these errors (active socket, double bind). */ 496 /* Check these errors (active socket, double bind). */
496 err = -EINVAL; 497 err = -EINVAL;
497 if (sk->sk_state != TCP_CLOSE || inet->num) 498 if (sk->sk_state != TCP_CLOSE || inet->inet_num)
498 goto out_release_sock; 499 goto out_release_sock;
499 500
500 inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; 501 inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
501 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) 502 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
502 inet->saddr = 0; /* Use device */ 503 inet->inet_saddr = 0; /* Use device */
503 504
504 /* Make sure we are allowed to bind here. */ 505 /* Make sure we are allowed to bind here. */
505 if (sk->sk_prot->get_port(sk, snum)) { 506 if (sk->sk_prot->get_port(sk, snum)) {
506 inet->saddr = inet->rcv_saddr = 0; 507 inet->inet_saddr = inet->inet_rcv_saddr = 0;
507 err = -EADDRINUSE; 508 err = -EADDRINUSE;
508 goto out_release_sock; 509 goto out_release_sock;
509 } 510 }
510 511
511 if (inet->rcv_saddr) 512 if (inet->inet_rcv_saddr)
512 sk->sk_userlocks |= SOCK_BINDADDR_LOCK; 513 sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
513 if (snum) 514 if (snum)
514 sk->sk_userlocks |= SOCK_BINDPORT_LOCK; 515 sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
515 inet->sport = htons(inet->num); 516 inet->inet_sport = htons(inet->inet_num);
516 inet->daddr = 0; 517 inet->inet_daddr = 0;
517 inet->dport = 0; 518 inet->inet_dport = 0;
518 sk_dst_reset(sk); 519 sk_dst_reset(sk);
519 err = 0; 520 err = 0;
520out_release_sock: 521out_release_sock:
@@ -532,7 +533,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
532 if (uaddr->sa_family == AF_UNSPEC) 533 if (uaddr->sa_family == AF_UNSPEC)
533 return sk->sk_prot->disconnect(sk, flags); 534 return sk->sk_prot->disconnect(sk, flags);
534 535
535 if (!inet_sk(sk)->num && inet_autobind(sk)) 536 if (!inet_sk(sk)->inet_num && inet_autobind(sk))
536 return -EAGAIN; 537 return -EAGAIN;
537 return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); 538 return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
538} 539}
@@ -685,21 +686,21 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
685{ 686{
686 struct sock *sk = sock->sk; 687 struct sock *sk = sock->sk;
687 struct inet_sock *inet = inet_sk(sk); 688 struct inet_sock *inet = inet_sk(sk);
688 struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; 689 DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr);
689 690
690 sin->sin_family = AF_INET; 691 sin->sin_family = AF_INET;
691 if (peer) { 692 if (peer) {
692 if (!inet->dport || 693 if (!inet->inet_dport ||
693 (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) && 694 (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
694 peer == 1)) 695 peer == 1))
695 return -ENOTCONN; 696 return -ENOTCONN;
696 sin->sin_port = inet->dport; 697 sin->sin_port = inet->inet_dport;
697 sin->sin_addr.s_addr = inet->daddr; 698 sin->sin_addr.s_addr = inet->inet_daddr;
698 } else { 699 } else {
699 __be32 addr = inet->rcv_saddr; 700 __be32 addr = inet->inet_rcv_saddr;
700 if (!addr) 701 if (!addr)
701 addr = inet->saddr; 702 addr = inet->inet_saddr;
702 sin->sin_port = inet->sport; 703 sin->sin_port = inet->inet_sport;
703 sin->sin_addr.s_addr = addr; 704 sin->sin_addr.s_addr = addr;
704 } 705 }
705 memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); 706 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
@@ -714,7 +715,7 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
714 struct sock *sk = sock->sk; 715 struct sock *sk = sock->sk;
715 716
716 /* We may need to bind the socket. */ 717 /* We may need to bind the socket. */
717 if (!inet_sk(sk)->num && inet_autobind(sk)) 718 if (!inet_sk(sk)->inet_num && inet_autobind(sk))
718 return -EAGAIN; 719 return -EAGAIN;
719 720
720 return sk->sk_prot->sendmsg(iocb, sk, msg, size); 721 return sk->sk_prot->sendmsg(iocb, sk, msg, size);
@@ -728,7 +729,7 @@ static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
728 struct sock *sk = sock->sk; 729 struct sock *sk = sock->sk;
729 730
730 /* We may need to bind the socket. */ 731 /* We may need to bind the socket. */
731 if (!inet_sk(sk)->num && inet_autobind(sk)) 732 if (!inet_sk(sk)->inet_num && inet_autobind(sk))
732 return -EAGAIN; 733 return -EAGAIN;
733 734
734 if (sk->sk_prot->sendpage) 735 if (sk->sk_prot->sendpage)
@@ -931,7 +932,7 @@ static const struct proto_ops inet_sockraw_ops = {
931#endif 932#endif
932}; 933};
933 934
934static struct net_proto_family inet_family_ops = { 935static const struct net_proto_family inet_family_ops = {
935 .family = PF_INET, 936 .family = PF_INET,
936 .create = inet_create, 937 .create = inet_create,
937 .owner = THIS_MODULE, 938 .owner = THIS_MODULE,
@@ -947,7 +948,6 @@ static struct inet_protosw inetsw_array[] =
947 .protocol = IPPROTO_TCP, 948 .protocol = IPPROTO_TCP,
948 .prot = &tcp_prot, 949 .prot = &tcp_prot,
949 .ops = &inet_stream_ops, 950 .ops = &inet_stream_ops,
950 .capability = -1,
951 .no_check = 0, 951 .no_check = 0,
952 .flags = INET_PROTOSW_PERMANENT | 952 .flags = INET_PROTOSW_PERMANENT |
953 INET_PROTOSW_ICSK, 953 INET_PROTOSW_ICSK,
@@ -958,7 +958,6 @@ static struct inet_protosw inetsw_array[] =
958 .protocol = IPPROTO_UDP, 958 .protocol = IPPROTO_UDP,
959 .prot = &udp_prot, 959 .prot = &udp_prot,
960 .ops = &inet_dgram_ops, 960 .ops = &inet_dgram_ops,
961 .capability = -1,
962 .no_check = UDP_CSUM_DEFAULT, 961 .no_check = UDP_CSUM_DEFAULT,
963 .flags = INET_PROTOSW_PERMANENT, 962 .flags = INET_PROTOSW_PERMANENT,
964 }, 963 },
@@ -969,7 +968,6 @@ static struct inet_protosw inetsw_array[] =
969 .protocol = IPPROTO_IP, /* wild card */ 968 .protocol = IPPROTO_IP, /* wild card */
970 .prot = &raw_prot, 969 .prot = &raw_prot,
971 .ops = &inet_sockraw_ops, 970 .ops = &inet_sockraw_ops,
972 .capability = CAP_NET_RAW,
973 .no_check = UDP_CSUM_DEFAULT, 971 .no_check = UDP_CSUM_DEFAULT,
974 .flags = INET_PROTOSW_REUSE, 972 .flags = INET_PROTOSW_REUSE,
975 } 973 }
@@ -1059,9 +1057,9 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1059 struct inet_sock *inet = inet_sk(sk); 1057 struct inet_sock *inet = inet_sk(sk);
1060 int err; 1058 int err;
1061 struct rtable *rt; 1059 struct rtable *rt;
1062 __be32 old_saddr = inet->saddr; 1060 __be32 old_saddr = inet->inet_saddr;
1063 __be32 new_saddr; 1061 __be32 new_saddr;
1064 __be32 daddr = inet->daddr; 1062 __be32 daddr = inet->inet_daddr;
1065 1063
1066 if (inet->opt && inet->opt->srr) 1064 if (inet->opt && inet->opt->srr)
1067 daddr = inet->opt->faddr; 1065 daddr = inet->opt->faddr;
@@ -1071,7 +1069,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1071 RT_CONN_FLAGS(sk), 1069 RT_CONN_FLAGS(sk),
1072 sk->sk_bound_dev_if, 1070 sk->sk_bound_dev_if,
1073 sk->sk_protocol, 1071 sk->sk_protocol,
1074 inet->sport, inet->dport, sk, 0); 1072 inet->inet_sport, inet->inet_dport, sk, 0);
1075 if (err) 1073 if (err)
1076 return err; 1074 return err;
1077 1075
@@ -1087,7 +1085,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1087 __func__, &old_saddr, &new_saddr); 1085 __func__, &old_saddr, &new_saddr);
1088 } 1086 }
1089 1087
1090 inet->saddr = inet->rcv_saddr = new_saddr; 1088 inet->inet_saddr = inet->inet_rcv_saddr = new_saddr;
1091 1089
1092 /* 1090 /*
1093 * XXX The only one ugly spot where we need to 1091 * XXX The only one ugly spot where we need to
@@ -1113,7 +1111,7 @@ int inet_sk_rebuild_header(struct sock *sk)
1113 return 0; 1111 return 0;
1114 1112
1115 /* Reroute. */ 1113 /* Reroute. */
1116 daddr = inet->daddr; 1114 daddr = inet->inet_daddr;
1117 if (inet->opt && inet->opt->srr) 1115 if (inet->opt && inet->opt->srr)
1118 daddr = inet->opt->faddr; 1116 daddr = inet->opt->faddr;
1119{ 1117{
@@ -1123,7 +1121,7 @@ int inet_sk_rebuild_header(struct sock *sk)
1123 .nl_u = { 1121 .nl_u = {
1124 .ip4_u = { 1122 .ip4_u = {
1125 .daddr = daddr, 1123 .daddr = daddr,
1126 .saddr = inet->saddr, 1124 .saddr = inet->inet_saddr,
1127 .tos = RT_CONN_FLAGS(sk), 1125 .tos = RT_CONN_FLAGS(sk),
1128 }, 1126 },
1129 }, 1127 },
@@ -1131,8 +1129,8 @@ int inet_sk_rebuild_header(struct sock *sk)
1131 .flags = inet_sk_flowi_flags(sk), 1129 .flags = inet_sk_flowi_flags(sk),
1132 .uli_u = { 1130 .uli_u = {
1133 .ports = { 1131 .ports = {
1134 .sport = inet->sport, 1132 .sport = inet->inet_sport,
1135 .dport = inet->dport, 1133 .dport = inet->inet_dport,
1136 }, 1134 },
1137 }, 1135 },
1138 }; 1136 };
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 5c662703eb1e..d07b0c1dd350 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -1,3 +1,4 @@
1#include <crypto/hash.h>
1#include <linux/err.h> 2#include <linux/err.h>
2#include <linux/module.h> 3#include <linux/module.h>
3#include <net/ip.h> 4#include <net/ip.h>
@@ -5,10 +6,67 @@
5#include <net/ah.h> 6#include <net/ah.h>
6#include <linux/crypto.h> 7#include <linux/crypto.h>
7#include <linux/pfkeyv2.h> 8#include <linux/pfkeyv2.h>
8#include <linux/spinlock.h> 9#include <linux/scatterlist.h>
9#include <net/icmp.h> 10#include <net/icmp.h>
10#include <net/protocol.h> 11#include <net/protocol.h>
11 12
13struct ah_skb_cb {
14 struct xfrm_skb_cb xfrm;
15 void *tmp;
16};
17
18#define AH_SKB_CB(__skb) ((struct ah_skb_cb *)&((__skb)->cb[0]))
19
20static void *ah_alloc_tmp(struct crypto_ahash *ahash, int nfrags,
21 unsigned int size)
22{
23 unsigned int len;
24
25 len = size + crypto_ahash_digestsize(ahash) +
26 (crypto_ahash_alignmask(ahash) &
27 ~(crypto_tfm_ctx_alignment() - 1));
28
29 len = ALIGN(len, crypto_tfm_ctx_alignment());
30
31 len += sizeof(struct ahash_request) + crypto_ahash_reqsize(ahash);
32 len = ALIGN(len, __alignof__(struct scatterlist));
33
34 len += sizeof(struct scatterlist) * nfrags;
35
36 return kmalloc(len, GFP_ATOMIC);
37}
38
39static inline u8 *ah_tmp_auth(void *tmp, unsigned int offset)
40{
41 return tmp + offset;
42}
43
44static inline u8 *ah_tmp_icv(struct crypto_ahash *ahash, void *tmp,
45 unsigned int offset)
46{
47 return PTR_ALIGN((u8 *)tmp + offset, crypto_ahash_alignmask(ahash) + 1);
48}
49
50static inline struct ahash_request *ah_tmp_req(struct crypto_ahash *ahash,
51 u8 *icv)
52{
53 struct ahash_request *req;
54
55 req = (void *)PTR_ALIGN(icv + crypto_ahash_digestsize(ahash),
56 crypto_tfm_ctx_alignment());
57
58 ahash_request_set_tfm(req, ahash);
59
60 return req;
61}
62
63static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash,
64 struct ahash_request *req)
65{
66 return (void *)ALIGN((unsigned long)(req + 1) +
67 crypto_ahash_reqsize(ahash),
68 __alignof__(struct scatterlist));
69}
12 70
13/* Clear mutable options and find final destination to substitute 71/* Clear mutable options and find final destination to substitute
14 * into IP header for icv calculation. Options are already checked 72 * into IP header for icv calculation. Options are already checked
@@ -54,20 +112,72 @@ static int ip_clear_mutable_options(struct iphdr *iph, __be32 *daddr)
54 return 0; 112 return 0;
55} 113}
56 114
115static void ah_output_done(struct crypto_async_request *base, int err)
116{
117 u8 *icv;
118 struct iphdr *iph;
119 struct sk_buff *skb = base->data;
120 struct xfrm_state *x = skb_dst(skb)->xfrm;
121 struct ah_data *ahp = x->data;
122 struct iphdr *top_iph = ip_hdr(skb);
123 struct ip_auth_hdr *ah = ip_auth_hdr(skb);
124 int ihl = ip_hdrlen(skb);
125
126 iph = AH_SKB_CB(skb)->tmp;
127 icv = ah_tmp_icv(ahp->ahash, iph, ihl);
128 memcpy(ah->auth_data, icv, ahp->icv_trunc_len);
129
130 top_iph->tos = iph->tos;
131 top_iph->ttl = iph->ttl;
132 top_iph->frag_off = iph->frag_off;
133 if (top_iph->ihl != 5) {
134 top_iph->daddr = iph->daddr;
135 memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr));
136 }
137
138 err = ah->nexthdr;
139
140 kfree(AH_SKB_CB(skb)->tmp);
141 xfrm_output_resume(skb, err);
142}
143
57static int ah_output(struct xfrm_state *x, struct sk_buff *skb) 144static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
58{ 145{
59 int err; 146 int err;
147 int nfrags;
148 int ihl;
149 u8 *icv;
150 struct sk_buff *trailer;
151 struct crypto_ahash *ahash;
152 struct ahash_request *req;
153 struct scatterlist *sg;
60 struct iphdr *iph, *top_iph; 154 struct iphdr *iph, *top_iph;
61 struct ip_auth_hdr *ah; 155 struct ip_auth_hdr *ah;
62 struct ah_data *ahp; 156 struct ah_data *ahp;
63 union { 157
64 struct iphdr iph; 158 ahp = x->data;
65 char buf[60]; 159 ahash = ahp->ahash;
66 } tmp_iph; 160
161 if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
162 goto out;
163 nfrags = err;
67 164
68 skb_push(skb, -skb_network_offset(skb)); 165 skb_push(skb, -skb_network_offset(skb));
166 ah = ip_auth_hdr(skb);
167 ihl = ip_hdrlen(skb);
168
169 err = -ENOMEM;
170 iph = ah_alloc_tmp(ahash, nfrags, ihl);
171 if (!iph)
172 goto out;
173
174 icv = ah_tmp_icv(ahash, iph, ihl);
175 req = ah_tmp_req(ahash, icv);
176 sg = ah_req_sg(ahash, req);
177
178 memset(ah->auth_data, 0, ahp->icv_trunc_len);
179
69 top_iph = ip_hdr(skb); 180 top_iph = ip_hdr(skb);
70 iph = &tmp_iph.iph;
71 181
72 iph->tos = top_iph->tos; 182 iph->tos = top_iph->tos;
73 iph->ttl = top_iph->ttl; 183 iph->ttl = top_iph->ttl;
@@ -78,10 +188,9 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
78 memcpy(iph+1, top_iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); 188 memcpy(iph+1, top_iph+1, top_iph->ihl*4 - sizeof(struct iphdr));
79 err = ip_clear_mutable_options(top_iph, &top_iph->daddr); 189 err = ip_clear_mutable_options(top_iph, &top_iph->daddr);
80 if (err) 190 if (err)
81 goto error; 191 goto out_free;
82 } 192 }
83 193
84 ah = ip_auth_hdr(skb);
85 ah->nexthdr = *skb_mac_header(skb); 194 ah->nexthdr = *skb_mac_header(skb);
86 *skb_mac_header(skb) = IPPROTO_AH; 195 *skb_mac_header(skb) = IPPROTO_AH;
87 196
@@ -91,20 +200,31 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
91 top_iph->ttl = 0; 200 top_iph->ttl = 0;
92 top_iph->check = 0; 201 top_iph->check = 0;
93 202
94 ahp = x->data;
95 ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; 203 ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2;
96 204
97 ah->reserved = 0; 205 ah->reserved = 0;
98 ah->spi = x->id.spi; 206 ah->spi = x->id.spi;
99 ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); 207 ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output);
100 208
101 spin_lock_bh(&x->lock); 209 sg_init_table(sg, nfrags);
102 err = ah_mac_digest(ahp, skb, ah->auth_data); 210 skb_to_sgvec(skb, sg, 0, skb->len);
103 memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len);
104 spin_unlock_bh(&x->lock);
105 211
106 if (err) 212 ahash_request_set_crypt(req, sg, icv, skb->len);
107 goto error; 213 ahash_request_set_callback(req, 0, ah_output_done, skb);
214
215 AH_SKB_CB(skb)->tmp = iph;
216
217 err = crypto_ahash_digest(req);
218 if (err) {
219 if (err == -EINPROGRESS)
220 goto out;
221
222 if (err == -EBUSY)
223 err = NET_XMIT_DROP;
224 goto out_free;
225 }
226
227 memcpy(ah->auth_data, icv, ahp->icv_trunc_len);
108 228
109 top_iph->tos = iph->tos; 229 top_iph->tos = iph->tos;
110 top_iph->ttl = iph->ttl; 230 top_iph->ttl = iph->ttl;
@@ -114,28 +234,67 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
114 memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); 234 memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr));
115 } 235 }
116 236
117 err = 0; 237out_free:
118 238 kfree(iph);
119error: 239out:
120 return err; 240 return err;
121} 241}
122 242
243static void ah_input_done(struct crypto_async_request *base, int err)
244{
245 u8 *auth_data;
246 u8 *icv;
247 struct iphdr *work_iph;
248 struct sk_buff *skb = base->data;
249 struct xfrm_state *x = xfrm_input_state(skb);
250 struct ah_data *ahp = x->data;
251 struct ip_auth_hdr *ah = ip_auth_hdr(skb);
252 int ihl = ip_hdrlen(skb);
253 int ah_hlen = (ah->hdrlen + 2) << 2;
254
255 work_iph = AH_SKB_CB(skb)->tmp;
256 auth_data = ah_tmp_auth(work_iph, ihl);
257 icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len);
258
259 err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0;
260 if (err)
261 goto out;
262
263 skb->network_header += ah_hlen;
264 memcpy(skb_network_header(skb), work_iph, ihl);
265 __skb_pull(skb, ah_hlen + ihl);
266 skb_set_transport_header(skb, -ihl);
267
268 err = ah->nexthdr;
269out:
270 kfree(AH_SKB_CB(skb)->tmp);
271 xfrm_input_resume(skb, err);
272}
273
123static int ah_input(struct xfrm_state *x, struct sk_buff *skb) 274static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
124{ 275{
125 int ah_hlen; 276 int ah_hlen;
126 int ihl; 277 int ihl;
127 int nexthdr; 278 int nexthdr;
128 int err = -EINVAL; 279 int nfrags;
129 struct iphdr *iph; 280 u8 *auth_data;
281 u8 *icv;
282 struct sk_buff *trailer;
283 struct crypto_ahash *ahash;
284 struct ahash_request *req;
285 struct scatterlist *sg;
286 struct iphdr *iph, *work_iph;
130 struct ip_auth_hdr *ah; 287 struct ip_auth_hdr *ah;
131 struct ah_data *ahp; 288 struct ah_data *ahp;
132 char work_buf[60]; 289 int err = -ENOMEM;
133 290
134 if (!pskb_may_pull(skb, sizeof(*ah))) 291 if (!pskb_may_pull(skb, sizeof(*ah)))
135 goto out; 292 goto out;
136 293
137 ah = (struct ip_auth_hdr *)skb->data; 294 ah = (struct ip_auth_hdr *)skb->data;
138 ahp = x->data; 295 ahp = x->data;
296 ahash = ahp->ahash;
297
139 nexthdr = ah->nexthdr; 298 nexthdr = ah->nexthdr;
140 ah_hlen = (ah->hdrlen + 2) << 2; 299 ah_hlen = (ah->hdrlen + 2) << 2;
141 300
@@ -156,9 +315,24 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
156 315
157 ah = (struct ip_auth_hdr *)skb->data; 316 ah = (struct ip_auth_hdr *)skb->data;
158 iph = ip_hdr(skb); 317 iph = ip_hdr(skb);
318 ihl = ip_hdrlen(skb);
319
320 if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
321 goto out;
322 nfrags = err;
323
324 work_iph = ah_alloc_tmp(ahash, nfrags, ihl + ahp->icv_trunc_len);
325 if (!work_iph)
326 goto out;
327
328 auth_data = ah_tmp_auth(work_iph, ihl);
329 icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len);
330 req = ah_tmp_req(ahash, icv);
331 sg = ah_req_sg(ahash, req);
159 332
160 ihl = skb->data - skb_network_header(skb); 333 memcpy(work_iph, iph, ihl);
161 memcpy(work_buf, iph, ihl); 334 memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
335 memset(ah->auth_data, 0, ahp->icv_trunc_len);
162 336
163 iph->ttl = 0; 337 iph->ttl = 0;
164 iph->tos = 0; 338 iph->tos = 0;
@@ -166,35 +340,44 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
166 iph->check = 0; 340 iph->check = 0;
167 if (ihl > sizeof(*iph)) { 341 if (ihl > sizeof(*iph)) {
168 __be32 dummy; 342 __be32 dummy;
169 if (ip_clear_mutable_options(iph, &dummy)) 343 err = ip_clear_mutable_options(iph, &dummy);
170 goto out; 344 if (err)
345 goto out_free;
171 } 346 }
172 347
173 spin_lock(&x->lock); 348 skb_push(skb, ihl);
174 {
175 u8 auth_data[MAX_AH_AUTH_LEN];
176 349
177 memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); 350 sg_init_table(sg, nfrags);
178 skb_push(skb, ihl); 351 skb_to_sgvec(skb, sg, 0, skb->len);
179 err = ah_mac_digest(ahp, skb, ah->auth_data); 352
180 if (err) 353 ahash_request_set_crypt(req, sg, icv, skb->len);
181 goto unlock; 354 ahash_request_set_callback(req, 0, ah_input_done, skb);
182 if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) 355
183 err = -EBADMSG; 356 AH_SKB_CB(skb)->tmp = work_iph;
357
358 err = crypto_ahash_digest(req);
359 if (err) {
360 if (err == -EINPROGRESS)
361 goto out;
362
363 if (err == -EBUSY)
364 err = NET_XMIT_DROP;
365 goto out_free;
184 } 366 }
185unlock:
186 spin_unlock(&x->lock);
187 367
368 err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0;
188 if (err) 369 if (err)
189 goto out; 370 goto out_free;
190 371
191 skb->network_header += ah_hlen; 372 skb->network_header += ah_hlen;
192 memcpy(skb_network_header(skb), work_buf, ihl); 373 memcpy(skb_network_header(skb), work_iph, ihl);
193 skb->transport_header = skb->network_header;
194 __skb_pull(skb, ah_hlen + ihl); 374 __skb_pull(skb, ah_hlen + ihl);
375 skb_set_transport_header(skb, -ihl);
195 376
196 return nexthdr; 377 err = nexthdr;
197 378
379out_free:
380 kfree (work_iph);
198out: 381out:
199 return err; 382 return err;
200} 383}
@@ -222,7 +405,7 @@ static int ah_init_state(struct xfrm_state *x)
222{ 405{
223 struct ah_data *ahp = NULL; 406 struct ah_data *ahp = NULL;
224 struct xfrm_algo_desc *aalg_desc; 407 struct xfrm_algo_desc *aalg_desc;
225 struct crypto_hash *tfm; 408 struct crypto_ahash *ahash;
226 409
227 if (!x->aalg) 410 if (!x->aalg)
228 goto error; 411 goto error;
@@ -231,31 +414,31 @@ static int ah_init_state(struct xfrm_state *x)
231 goto error; 414 goto error;
232 415
233 ahp = kzalloc(sizeof(*ahp), GFP_KERNEL); 416 ahp = kzalloc(sizeof(*ahp), GFP_KERNEL);
234 if (ahp == NULL) 417 if (!ahp)
235 return -ENOMEM; 418 return -ENOMEM;
236 419
237 tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC); 420 ahash = crypto_alloc_ahash(x->aalg->alg_name, 0, 0);
238 if (IS_ERR(tfm)) 421 if (IS_ERR(ahash))
239 goto error; 422 goto error;
240 423
241 ahp->tfm = tfm; 424 ahp->ahash = ahash;
242 if (crypto_hash_setkey(tfm, x->aalg->alg_key, 425 if (crypto_ahash_setkey(ahash, x->aalg->alg_key,
243 (x->aalg->alg_key_len + 7) / 8)) 426 (x->aalg->alg_key_len + 7) / 8))
244 goto error; 427 goto error;
245 428
246 /* 429 /*
247 * Lookup the algorithm description maintained by xfrm_algo, 430 * Lookup the algorithm description maintained by xfrm_algo,
248 * verify crypto transform properties, and store information 431 * verify crypto transform properties, and store information
249 * we need for AH processing. This lookup cannot fail here 432 * we need for AH processing. This lookup cannot fail here
250 * after a successful crypto_alloc_hash(). 433 * after a successful crypto_alloc_ahash().
251 */ 434 */
252 aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); 435 aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
253 BUG_ON(!aalg_desc); 436 BUG_ON(!aalg_desc);
254 437
255 if (aalg_desc->uinfo.auth.icv_fullbits/8 != 438 if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
256 crypto_hash_digestsize(tfm)) { 439 crypto_ahash_digestsize(ahash)) {
257 printk(KERN_INFO "AH: %s digestsize %u != %hu\n", 440 printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
258 x->aalg->alg_name, crypto_hash_digestsize(tfm), 441 x->aalg->alg_name, crypto_ahash_digestsize(ahash),
259 aalg_desc->uinfo.auth.icv_fullbits/8); 442 aalg_desc->uinfo.auth.icv_fullbits/8);
260 goto error; 443 goto error;
261 } 444 }
@@ -265,10 +448,6 @@ static int ah_init_state(struct xfrm_state *x)
265 448
266 BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN); 449 BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
267 450
268 ahp->work_icv = kmalloc(ahp->icv_full_len, GFP_KERNEL);
269 if (!ahp->work_icv)
270 goto error;
271
272 x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + 451 x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) +
273 ahp->icv_trunc_len); 452 ahp->icv_trunc_len);
274 if (x->props.mode == XFRM_MODE_TUNNEL) 453 if (x->props.mode == XFRM_MODE_TUNNEL)
@@ -279,8 +458,7 @@ static int ah_init_state(struct xfrm_state *x)
279 458
280error: 459error:
281 if (ahp) { 460 if (ahp) {
282 kfree(ahp->work_icv); 461 crypto_free_ahash(ahp->ahash);
283 crypto_free_hash(ahp->tfm);
284 kfree(ahp); 462 kfree(ahp);
285 } 463 }
286 return -EINVAL; 464 return -EINVAL;
@@ -293,8 +471,7 @@ static void ah_destroy(struct xfrm_state *x)
293 if (!ahp) 471 if (!ahp)
294 return; 472 return;
295 473
296 kfree(ahp->work_icv); 474 crypto_free_ahash(ahp->ahash);
297 crypto_free_hash(ahp->tfm);
298 kfree(ahp); 475 kfree(ahp);
299} 476}
300 477
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 039cc1ffe977..1e029dc75455 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -2017,7 +2017,7 @@ req_setattr_failure:
2017 * values on failure. 2017 * values on failure.
2018 * 2018 *
2019 */ 2019 */
2020int cipso_v4_delopt(struct ip_options **opt_ptr) 2020static int cipso_v4_delopt(struct ip_options **opt_ptr)
2021{ 2021{
2022 int hdr_delta = 0; 2022 int hdr_delta = 0;
2023 struct ip_options *opt = *opt_ptr; 2023 struct ip_options *opt = *opt_ptr;
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 5e6c5a0f3fde..fb2465811b48 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -39,7 +39,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
39 sk_dst_reset(sk); 39 sk_dst_reset(sk);
40 40
41 oif = sk->sk_bound_dev_if; 41 oif = sk->sk_bound_dev_if;
42 saddr = inet->saddr; 42 saddr = inet->inet_saddr;
43 if (ipv4_is_multicast(usin->sin_addr.s_addr)) { 43 if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
44 if (!oif) 44 if (!oif)
45 oif = inet->mc_index; 45 oif = inet->mc_index;
@@ -49,7 +49,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
49 err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr, 49 err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr,
50 RT_CONN_FLAGS(sk), oif, 50 RT_CONN_FLAGS(sk), oif,
51 sk->sk_protocol, 51 sk->sk_protocol,
52 inet->sport, usin->sin_port, sk, 1); 52 inet->inet_sport, usin->sin_port, sk, 1);
53 if (err) { 53 if (err) {
54 if (err == -ENETUNREACH) 54 if (err == -ENETUNREACH)
55 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 55 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
@@ -60,14 +60,14 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
60 ip_rt_put(rt); 60 ip_rt_put(rt);
61 return -EACCES; 61 return -EACCES;
62 } 62 }
63 if (!inet->saddr) 63 if (!inet->inet_saddr)
64 inet->saddr = rt->rt_src; /* Update source address */ 64 inet->inet_saddr = rt->rt_src; /* Update source address */
65 if (!inet->rcv_saddr) 65 if (!inet->inet_rcv_saddr)
66 inet->rcv_saddr = rt->rt_src; 66 inet->inet_rcv_saddr = rt->rt_src;
67 inet->daddr = rt->rt_dst; 67 inet->inet_daddr = rt->rt_dst;
68 inet->dport = usin->sin_port; 68 inet->inet_dport = usin->sin_port;
69 sk->sk_state = TCP_ESTABLISHED; 69 sk->sk_state = TCP_ESTABLISHED;
70 inet->id = jiffies; 70 inet->inet_id = jiffies;
71 71
72 sk_dst_set(sk, &rt->u.dst); 72 sk_dst_set(sk, &rt->u.dst);
73 return(0); 73 return(0);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5df2f6a0b0f0..c2045f9615da 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -140,11 +140,11 @@ void in_dev_finish_destroy(struct in_device *idev)
140#endif 140#endif
141 dev_put(dev); 141 dev_put(dev);
142 if (!idev->dead) 142 if (!idev->dead)
143 printk("Freeing alive in_device %p\n", idev); 143 pr_err("Freeing alive in_device %p\n", idev);
144 else { 144 else
145 kfree(idev); 145 kfree(idev);
146 }
147} 146}
147EXPORT_SYMBOL(in_dev_finish_destroy);
148 148
149static struct in_device *inetdev_init(struct net_device *dev) 149static struct in_device *inetdev_init(struct net_device *dev)
150{ 150{
@@ -159,7 +159,8 @@ static struct in_device *inetdev_init(struct net_device *dev)
159 sizeof(in_dev->cnf)); 159 sizeof(in_dev->cnf));
160 in_dev->cnf.sysctl = NULL; 160 in_dev->cnf.sysctl = NULL;
161 in_dev->dev = dev; 161 in_dev->dev = dev;
162 if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL) 162 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
163 if (!in_dev->arp_parms)
163 goto out_kfree; 164 goto out_kfree;
164 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) 165 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
165 dev_disable_lro(dev); 166 dev_disable_lro(dev);
@@ -405,13 +406,15 @@ struct in_device *inetdev_by_index(struct net *net, int ifindex)
405{ 406{
406 struct net_device *dev; 407 struct net_device *dev;
407 struct in_device *in_dev = NULL; 408 struct in_device *in_dev = NULL;
408 read_lock(&dev_base_lock); 409
409 dev = __dev_get_by_index(net, ifindex); 410 rcu_read_lock();
411 dev = dev_get_by_index_rcu(net, ifindex);
410 if (dev) 412 if (dev)
411 in_dev = in_dev_get(dev); 413 in_dev = in_dev_get(dev);
412 read_unlock(&dev_base_lock); 414 rcu_read_unlock();
413 return in_dev; 415 return in_dev;
414} 416}
417EXPORT_SYMBOL(inetdev_by_index);
415 418
416/* Called only from RTNL semaphored context. No locks. */ 419/* Called only from RTNL semaphored context. No locks. */
417 420
@@ -557,7 +560,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
557 * Determine a default network mask, based on the IP address. 560 * Determine a default network mask, based on the IP address.
558 */ 561 */
559 562
560static __inline__ int inet_abc_len(__be32 addr) 563static inline int inet_abc_len(__be32 addr)
561{ 564{
562 int rc = -1; /* Something else, probably a multicast. */ 565 int rc = -1; /* Something else, probably a multicast. */
563 566
@@ -646,13 +649,15 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
646 rtnl_lock(); 649 rtnl_lock();
647 650
648 ret = -ENODEV; 651 ret = -ENODEV;
649 if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL) 652 dev = __dev_get_by_name(net, ifr.ifr_name);
653 if (!dev)
650 goto done; 654 goto done;
651 655
652 if (colon) 656 if (colon)
653 *colon = ':'; 657 *colon = ':';
654 658
655 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { 659 in_dev = __in_dev_get_rtnl(dev);
660 if (in_dev) {
656 if (tryaddrmatch) { 661 if (tryaddrmatch) {
657 /* Matthias Andree */ 662 /* Matthias Andree */
658 /* compare label and address (4.4BSD style) */ 663 /* compare label and address (4.4BSD style) */
@@ -720,7 +725,8 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
720 725
721 if (!ifa) { 726 if (!ifa) {
722 ret = -ENOBUFS; 727 ret = -ENOBUFS;
723 if ((ifa = inet_alloc_ifa()) == NULL) 728 ifa = inet_alloc_ifa();
729 if (!ifa)
724 break; 730 break;
725 if (colon) 731 if (colon)
726 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); 732 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
@@ -822,10 +828,10 @@ static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
822 struct ifreq ifr; 828 struct ifreq ifr;
823 int done = 0; 829 int done = 0;
824 830
825 if (!in_dev || (ifa = in_dev->ifa_list) == NULL) 831 if (!in_dev)
826 goto out; 832 goto out;
827 833
828 for (; ifa; ifa = ifa->ifa_next) { 834 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
829 if (!buf) { 835 if (!buf) {
830 done += sizeof(ifr); 836 done += sizeof(ifr);
831 continue; 837 continue;
@@ -875,36 +881,33 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
875 if (!addr) 881 if (!addr)
876 addr = ifa->ifa_local; 882 addr = ifa->ifa_local;
877 } endfor_ifa(in_dev); 883 } endfor_ifa(in_dev);
878no_in_dev:
879 rcu_read_unlock();
880 884
881 if (addr) 885 if (addr)
882 goto out; 886 goto out_unlock;
887no_in_dev:
883 888
884 /* Not loopback addresses on loopback should be preferred 889 /* Not loopback addresses on loopback should be preferred
885 in this case. It is importnat that lo is the first interface 890 in this case. It is importnat that lo is the first interface
886 in dev_base list. 891 in dev_base list.
887 */ 892 */
888 read_lock(&dev_base_lock); 893 for_each_netdev_rcu(net, dev) {
889 rcu_read_lock(); 894 in_dev = __in_dev_get_rcu(dev);
890 for_each_netdev(net, dev) { 895 if (!in_dev)
891 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
892 continue; 896 continue;
893 897
894 for_primary_ifa(in_dev) { 898 for_primary_ifa(in_dev) {
895 if (ifa->ifa_scope != RT_SCOPE_LINK && 899 if (ifa->ifa_scope != RT_SCOPE_LINK &&
896 ifa->ifa_scope <= scope) { 900 ifa->ifa_scope <= scope) {
897 addr = ifa->ifa_local; 901 addr = ifa->ifa_local;
898 goto out_unlock_both; 902 goto out_unlock;
899 } 903 }
900 } endfor_ifa(in_dev); 904 } endfor_ifa(in_dev);
901 } 905 }
902out_unlock_both: 906out_unlock:
903 read_unlock(&dev_base_lock);
904 rcu_read_unlock(); 907 rcu_read_unlock();
905out:
906 return addr; 908 return addr;
907} 909}
910EXPORT_SYMBOL(inet_select_addr);
908 911
909static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, 912static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
910 __be32 local, int scope) 913 __be32 local, int scope)
@@ -940,7 +943,7 @@ static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
940 } 943 }
941 } endfor_ifa(in_dev); 944 } endfor_ifa(in_dev);
942 945
943 return same? addr : 0; 946 return same ? addr : 0;
944} 947}
945 948
946/* 949/*
@@ -961,17 +964,16 @@ __be32 inet_confirm_addr(struct in_device *in_dev,
961 return confirm_addr_indev(in_dev, dst, local, scope); 964 return confirm_addr_indev(in_dev, dst, local, scope);
962 965
963 net = dev_net(in_dev->dev); 966 net = dev_net(in_dev->dev);
964 read_lock(&dev_base_lock);
965 rcu_read_lock(); 967 rcu_read_lock();
966 for_each_netdev(net, dev) { 968 for_each_netdev_rcu(net, dev) {
967 if ((in_dev = __in_dev_get_rcu(dev))) { 969 in_dev = __in_dev_get_rcu(dev);
970 if (in_dev) {
968 addr = confirm_addr_indev(in_dev, dst, local, scope); 971 addr = confirm_addr_indev(in_dev, dst, local, scope);
969 if (addr) 972 if (addr)
970 break; 973 break;
971 } 974 }
972 } 975 }
973 rcu_read_unlock(); 976 rcu_read_unlock();
974 read_unlock(&dev_base_lock);
975 977
976 return addr; 978 return addr;
977} 979}
@@ -984,14 +986,16 @@ int register_inetaddr_notifier(struct notifier_block *nb)
984{ 986{
985 return blocking_notifier_chain_register(&inetaddr_chain, nb); 987 return blocking_notifier_chain_register(&inetaddr_chain, nb);
986} 988}
989EXPORT_SYMBOL(register_inetaddr_notifier);
987 990
988int unregister_inetaddr_notifier(struct notifier_block *nb) 991int unregister_inetaddr_notifier(struct notifier_block *nb)
989{ 992{
990 return blocking_notifier_chain_unregister(&inetaddr_chain, nb); 993 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
991} 994}
995EXPORT_SYMBOL(unregister_inetaddr_notifier);
992 996
993/* Rename ifa_labels for a device name change. Make some effort to preserve existing 997/* Rename ifa_labels for a device name change. Make some effort to preserve
994 * alias numbering and to create unique labels if possible. 998 * existing alias numbering and to create unique labels if possible.
995*/ 999*/
996static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) 1000static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
997{ 1001{
@@ -1010,11 +1014,10 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1010 sprintf(old, ":%d", named); 1014 sprintf(old, ":%d", named);
1011 dot = old; 1015 dot = old;
1012 } 1016 }
1013 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) { 1017 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1014 strcat(ifa->ifa_label, dot); 1018 strcat(ifa->ifa_label, dot);
1015 } else { 1019 else
1016 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); 1020 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1017 }
1018skip: 1021skip:
1019 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); 1022 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1020 } 1023 }
@@ -1061,8 +1064,9 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1061 if (!inetdev_valid_mtu(dev->mtu)) 1064 if (!inetdev_valid_mtu(dev->mtu))
1062 break; 1065 break;
1063 if (dev->flags & IFF_LOOPBACK) { 1066 if (dev->flags & IFF_LOOPBACK) {
1064 struct in_ifaddr *ifa; 1067 struct in_ifaddr *ifa = inet_alloc_ifa();
1065 if ((ifa = inet_alloc_ifa()) != NULL) { 1068
1069 if (ifa) {
1066 ifa->ifa_local = 1070 ifa->ifa_local =
1067 ifa->ifa_address = htonl(INADDR_LOOPBACK); 1071 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1068 ifa->ifa_prefixlen = 8; 1072 ifa->ifa_prefixlen = 8;
@@ -1183,7 +1187,8 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1183 goto cont; 1187 goto cont;
1184 if (idx > s_idx) 1188 if (idx > s_idx)
1185 s_ip_idx = 0; 1189 s_ip_idx = 0;
1186 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) 1190 in_dev = __in_dev_get_rtnl(dev);
1191 if (!in_dev)
1187 goto cont; 1192 goto cont;
1188 1193
1189 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; 1194 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
@@ -1239,18 +1244,18 @@ static void devinet_copy_dflt_conf(struct net *net, int i)
1239{ 1244{
1240 struct net_device *dev; 1245 struct net_device *dev;
1241 1246
1242 read_lock(&dev_base_lock); 1247 rcu_read_lock();
1243 for_each_netdev(net, dev) { 1248 for_each_netdev_rcu(net, dev) {
1244 struct in_device *in_dev; 1249 struct in_device *in_dev;
1245 rcu_read_lock(); 1250
1246 in_dev = __in_dev_get_rcu(dev); 1251 in_dev = __in_dev_get_rcu(dev);
1247 if (in_dev && !test_bit(i, in_dev->cnf.state)) 1252 if (in_dev && !test_bit(i, in_dev->cnf.state))
1248 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i]; 1253 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1249 rcu_read_unlock();
1250 } 1254 }
1251 read_unlock(&dev_base_lock); 1255 rcu_read_unlock();
1252} 1256}
1253 1257
1258/* called with RTNL locked */
1254static void inet_forward_change(struct net *net) 1259static void inet_forward_change(struct net *net)
1255{ 1260{
1256 struct net_device *dev; 1261 struct net_device *dev;
@@ -1259,7 +1264,6 @@ static void inet_forward_change(struct net *net)
1259 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; 1264 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1260 IPV4_DEVCONF_DFLT(net, FORWARDING) = on; 1265 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1261 1266
1262 read_lock(&dev_base_lock);
1263 for_each_netdev(net, dev) { 1267 for_each_netdev(net, dev) {
1264 struct in_device *in_dev; 1268 struct in_device *in_dev;
1265 if (on) 1269 if (on)
@@ -1270,7 +1274,6 @@ static void inet_forward_change(struct net *net)
1270 IN_DEV_CONF_SET(in_dev, FORWARDING, on); 1274 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1271 rcu_read_unlock(); 1275 rcu_read_unlock();
1272 } 1276 }
1273 read_unlock(&dev_base_lock);
1274} 1277}
1275 1278
1276static int devinet_conf_proc(ctl_table *ctl, int write, 1279static int devinet_conf_proc(ctl_table *ctl, int write,
@@ -1680,8 +1683,3 @@ void __init devinet_init(void)
1680 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); 1683 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1681} 1684}
1682 1685
1683EXPORT_SYMBOL(in_dev_finish_destroy);
1684EXPORT_SYMBOL(inet_select_addr);
1685EXPORT_SYMBOL(inetdev_by_index);
1686EXPORT_SYMBOL(register_inetaddr_notifier);
1687EXPORT_SYMBOL(unregister_inetaddr_notifier);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index aa00398be80e..816e2180bd60 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -125,7 +125,7 @@ void fib_select_default(struct net *net,
125#endif 125#endif
126 tb = fib_get_table(net, table); 126 tb = fib_get_table(net, table);
127 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 127 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
128 tb->tb_select_default(tb, flp, res); 128 fib_table_select_default(tb, flp, res);
129} 129}
130 130
131static void fib_flush(struct net *net) 131static void fib_flush(struct net *net)
@@ -139,7 +139,7 @@ static void fib_flush(struct net *net)
139 for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 139 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
140 head = &net->ipv4.fib_table_hash[h]; 140 head = &net->ipv4.fib_table_hash[h];
141 hlist_for_each_entry(tb, node, head, tb_hlist) 141 hlist_for_each_entry(tb, node, head, tb_hlist)
142 flushed += tb->tb_flush(tb); 142 flushed += fib_table_flush(tb);
143 } 143 }
144 144
145 if (flushed) 145 if (flushed)
@@ -162,7 +162,7 @@ struct net_device * ip_dev_find(struct net *net, __be32 addr)
162#endif 162#endif
163 163
164 local_table = fib_get_table(net, RT_TABLE_LOCAL); 164 local_table = fib_get_table(net, RT_TABLE_LOCAL);
165 if (!local_table || local_table->tb_lookup(local_table, &fl, &res)) 165 if (!local_table || fib_table_lookup(local_table, &fl, &res))
166 return NULL; 166 return NULL;
167 if (res.type != RTN_LOCAL) 167 if (res.type != RTN_LOCAL)
168 goto out; 168 goto out;
@@ -200,7 +200,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
200 local_table = fib_get_table(net, RT_TABLE_LOCAL); 200 local_table = fib_get_table(net, RT_TABLE_LOCAL);
201 if (local_table) { 201 if (local_table) {
202 ret = RTN_UNICAST; 202 ret = RTN_UNICAST;
203 if (!local_table->tb_lookup(local_table, &fl, &res)) { 203 if (!fib_table_lookup(local_table, &fl, &res)) {
204 if (!dev || dev == res.fi->fib_dev) 204 if (!dev || dev == res.fi->fib_dev)
205 ret = res.type; 205 ret = res.type;
206 fib_res_put(&res); 206 fib_res_put(&res);
@@ -476,13 +476,13 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
476 if (cmd == SIOCDELRT) { 476 if (cmd == SIOCDELRT) {
477 tb = fib_get_table(net, cfg.fc_table); 477 tb = fib_get_table(net, cfg.fc_table);
478 if (tb) 478 if (tb)
479 err = tb->tb_delete(tb, &cfg); 479 err = fib_table_delete(tb, &cfg);
480 else 480 else
481 err = -ESRCH; 481 err = -ESRCH;
482 } else { 482 } else {
483 tb = fib_new_table(net, cfg.fc_table); 483 tb = fib_new_table(net, cfg.fc_table);
484 if (tb) 484 if (tb)
485 err = tb->tb_insert(tb, &cfg); 485 err = fib_table_insert(tb, &cfg);
486 else 486 else
487 err = -ENOBUFS; 487 err = -ENOBUFS;
488 } 488 }
@@ -597,7 +597,7 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *ar
597 goto errout; 597 goto errout;
598 } 598 }
599 599
600 err = tb->tb_delete(tb, &cfg); 600 err = fib_table_delete(tb, &cfg);
601errout: 601errout:
602 return err; 602 return err;
603} 603}
@@ -619,7 +619,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *ar
619 goto errout; 619 goto errout;
620 } 620 }
621 621
622 err = tb->tb_insert(tb, &cfg); 622 err = fib_table_insert(tb, &cfg);
623errout: 623errout:
624 return err; 624 return err;
625} 625}
@@ -650,7 +650,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
650 if (dumped) 650 if (dumped)
651 memset(&cb->args[2], 0, sizeof(cb->args) - 651 memset(&cb->args[2], 0, sizeof(cb->args) -
652 2 * sizeof(cb->args[0])); 652 2 * sizeof(cb->args[0]));
653 if (tb->tb_dump(tb, skb, cb) < 0) 653 if (fib_table_dump(tb, skb, cb) < 0)
654 goto out; 654 goto out;
655 dumped = 1; 655 dumped = 1;
656next: 656next:
@@ -704,9 +704,9 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
704 cfg.fc_scope = RT_SCOPE_HOST; 704 cfg.fc_scope = RT_SCOPE_HOST;
705 705
706 if (cmd == RTM_NEWROUTE) 706 if (cmd == RTM_NEWROUTE)
707 tb->tb_insert(tb, &cfg); 707 fib_table_insert(tb, &cfg);
708 else 708 else
709 tb->tb_delete(tb, &cfg); 709 fib_table_delete(tb, &cfg);
710} 710}
711 711
712void fib_add_ifaddr(struct in_ifaddr *ifa) 712void fib_add_ifaddr(struct in_ifaddr *ifa)
@@ -835,7 +835,7 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
835 local_bh_disable(); 835 local_bh_disable();
836 836
837 frn->tb_id = tb->tb_id; 837 frn->tb_id = tb->tb_id;
838 frn->err = tb->tb_lookup(tb, &fl, &res); 838 frn->err = fib_table_lookup(tb, &fl, &res);
839 839
840 if (!frn->err) { 840 if (!frn->err) {
841 frn->prefixlen = res.prefixlen; 841 frn->prefixlen = res.prefixlen;
@@ -1012,7 +1012,7 @@ static void __net_exit ip_fib_net_exit(struct net *net)
1012 head = &net->ipv4.fib_table_hash[i]; 1012 head = &net->ipv4.fib_table_hash[i];
1013 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { 1013 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1014 hlist_del(node); 1014 hlist_del(node);
1015 tb->tb_flush(tb); 1015 fib_table_flush(tb);
1016 kfree(tb); 1016 kfree(tb);
1017 } 1017 }
1018 } 1018 }
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index ecd39454235c..14972017b9c2 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -242,8 +242,8 @@ fn_new_zone(struct fn_hash *table, int z)
242 return fz; 242 return fz;
243} 243}
244 244
245static int 245int fib_table_lookup(struct fib_table *tb,
246fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) 246 const struct flowi *flp, struct fib_result *res)
247{ 247{
248 int err; 248 int err;
249 struct fn_zone *fz; 249 struct fn_zone *fz;
@@ -274,8 +274,8 @@ out:
274 return err; 274 return err;
275} 275}
276 276
277static void 277void fib_table_select_default(struct fib_table *tb,
278fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) 278 const struct flowi *flp, struct fib_result *res)
279{ 279{
280 int order, last_idx; 280 int order, last_idx;
281 struct hlist_node *node; 281 struct hlist_node *node;
@@ -366,7 +366,7 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
366 return NULL; 366 return NULL;
367} 367}
368 368
369static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) 369int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
370{ 370{
371 struct fn_hash *table = (struct fn_hash *) tb->tb_data; 371 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
372 struct fib_node *new_f = NULL; 372 struct fib_node *new_f = NULL;
@@ -544,8 +544,7 @@ out:
544 return err; 544 return err;
545} 545}
546 546
547 547int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
548static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg)
549{ 548{
550 struct fn_hash *table = (struct fn_hash *)tb->tb_data; 549 struct fn_hash *table = (struct fn_hash *)tb->tb_data;
551 struct fib_node *f; 550 struct fib_node *f;
@@ -662,7 +661,7 @@ static int fn_flush_list(struct fn_zone *fz, int idx)
662 return found; 661 return found;
663} 662}
664 663
665static int fn_hash_flush(struct fib_table *tb) 664int fib_table_flush(struct fib_table *tb)
666{ 665{
667 struct fn_hash *table = (struct fn_hash *) tb->tb_data; 666 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
668 struct fn_zone *fz; 667 struct fn_zone *fz;
@@ -743,7 +742,8 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
743 return skb->len; 742 return skb->len;
744} 743}
745 744
746static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) 745int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
746 struct netlink_callback *cb)
747{ 747{
748 int m, s_m; 748 int m, s_m;
749 struct fn_zone *fz; 749 struct fn_zone *fz;
@@ -787,12 +787,7 @@ struct fib_table *fib_hash_table(u32 id)
787 787
788 tb->tb_id = id; 788 tb->tb_id = id;
789 tb->tb_default = -1; 789 tb->tb_default = -1;
790 tb->tb_lookup = fn_hash_lookup; 790
791 tb->tb_insert = fn_hash_insert;
792 tb->tb_delete = fn_hash_delete;
793 tb->tb_flush = fn_hash_flush;
794 tb->tb_select_default = fn_hash_select_default;
795 tb->tb_dump = fn_hash_dump;
796 memset(tb->tb_data, 0, sizeof(struct fn_hash)); 791 memset(tb->tb_data, 0, sizeof(struct fn_hash));
797 return tb; 792 return tb;
798} 793}
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 92d9d97ec5e3..835262c2b867 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -94,7 +94,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
94 if ((tbl = fib_get_table(rule->fr_net, rule->table)) == NULL) 94 if ((tbl = fib_get_table(rule->fr_net, rule->table)) == NULL)
95 goto errout; 95 goto errout;
96 96
97 err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result); 97 err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result);
98 if (err > 0) 98 if (err > 0)
99 err = -EAGAIN; 99 err = -EAGAIN;
100errout: 100errout:
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 291bdf50a21f..af5d89792860 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1174,7 +1174,7 @@ done:
1174/* 1174/*
1175 * Caller must hold RTNL. 1175 * Caller must hold RTNL.
1176 */ 1176 */
1177static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) 1177int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1178{ 1178{
1179 struct trie *t = (struct trie *) tb->tb_data; 1179 struct trie *t = (struct trie *) tb->tb_data;
1180 struct fib_alias *fa, *new_fa; 1180 struct fib_alias *fa, *new_fa;
@@ -1373,8 +1373,8 @@ static int check_leaf(struct trie *t, struct leaf *l,
1373 return 1; 1373 return 1;
1374} 1374}
1375 1375
1376static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, 1376int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1377 struct fib_result *res) 1377 struct fib_result *res)
1378{ 1378{
1379 struct trie *t = (struct trie *) tb->tb_data; 1379 struct trie *t = (struct trie *) tb->tb_data;
1380 int ret; 1380 int ret;
@@ -1595,7 +1595,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l)
1595/* 1595/*
1596 * Caller must hold RTNL. 1596 * Caller must hold RTNL.
1597 */ 1597 */
1598static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) 1598int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
1599{ 1599{
1600 struct trie *t = (struct trie *) tb->tb_data; 1600 struct trie *t = (struct trie *) tb->tb_data;
1601 u32 key, mask; 1601 u32 key, mask;
@@ -1786,7 +1786,7 @@ static struct leaf *trie_leafindex(struct trie *t, int index)
1786/* 1786/*
1787 * Caller must hold RTNL. 1787 * Caller must hold RTNL.
1788 */ 1788 */
1789static int fn_trie_flush(struct fib_table *tb) 1789int fib_table_flush(struct fib_table *tb)
1790{ 1790{
1791 struct trie *t = (struct trie *) tb->tb_data; 1791 struct trie *t = (struct trie *) tb->tb_data;
1792 struct leaf *l, *ll = NULL; 1792 struct leaf *l, *ll = NULL;
@@ -1807,9 +1807,9 @@ static int fn_trie_flush(struct fib_table *tb)
1807 return found; 1807 return found;
1808} 1808}
1809 1809
1810static void fn_trie_select_default(struct fib_table *tb, 1810void fib_table_select_default(struct fib_table *tb,
1811 const struct flowi *flp, 1811 const struct flowi *flp,
1812 struct fib_result *res) 1812 struct fib_result *res)
1813{ 1813{
1814 struct trie *t = (struct trie *) tb->tb_data; 1814 struct trie *t = (struct trie *) tb->tb_data;
1815 int order, last_idx; 1815 int order, last_idx;
@@ -1952,8 +1952,8 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
1952 return skb->len; 1952 return skb->len;
1953} 1953}
1954 1954
1955static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, 1955int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
1956 struct netlink_callback *cb) 1956 struct netlink_callback *cb)
1957{ 1957{
1958 struct leaf *l; 1958 struct leaf *l;
1959 struct trie *t = (struct trie *) tb->tb_data; 1959 struct trie *t = (struct trie *) tb->tb_data;
@@ -2020,12 +2020,6 @@ struct fib_table *fib_hash_table(u32 id)
2020 2020
2021 tb->tb_id = id; 2021 tb->tb_id = id;
2022 tb->tb_default = -1; 2022 tb->tb_default = -1;
2023 tb->tb_lookup = fn_trie_lookup;
2024 tb->tb_insert = fn_trie_insert;
2025 tb->tb_delete = fn_trie_delete;
2026 tb->tb_flush = fn_trie_flush;
2027 tb->tb_select_default = fn_trie_select_default;
2028 tb->tb_dump = fn_trie_dump;
2029 2023
2030 t = (struct trie *) tb->tb_data; 2024 t = (struct trie *) tb->tb_data;
2031 memset(t, 0, sizeof(*t)); 2025 memset(t, 0, sizeof(*t));
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 5bc13fe816d1..fe11f60ce41b 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -501,15 +501,16 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
501 if (!(rt->rt_flags & RTCF_LOCAL)) { 501 if (!(rt->rt_flags & RTCF_LOCAL)) {
502 struct net_device *dev = NULL; 502 struct net_device *dev = NULL;
503 503
504 rcu_read_lock();
504 if (rt->fl.iif && 505 if (rt->fl.iif &&
505 net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) 506 net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
506 dev = dev_get_by_index(net, rt->fl.iif); 507 dev = dev_get_by_index_rcu(net, rt->fl.iif);
507 508
508 if (dev) { 509 if (dev)
509 saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); 510 saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
510 dev_put(dev); 511 else
511 } else
512 saddr = 0; 512 saddr = 0;
513 rcu_read_unlock();
513 } 514 }
514 515
515 tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | 516 tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
@@ -1165,6 +1166,10 @@ static int __net_init icmp_sk_init(struct net *net)
1165 sk->sk_sndbuf = 1166 sk->sk_sndbuf =
1166 (2 * ((64 * 1024) + sizeof(struct sk_buff))); 1167 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
1167 1168
1169 /*
1170 * Speedup sock_wfree()
1171 */
1172 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1168 inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT; 1173 inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
1169 } 1174 }
1170 1175
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 537731b3bcb3..26fb50e91311 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -358,6 +358,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
358 const struct inet_request_sock *ireq = inet_rsk(req); 358 const struct inet_request_sock *ireq = inet_rsk(req);
359 struct ip_options *opt = inet_rsk(req)->opt; 359 struct ip_options *opt = inet_rsk(req)->opt;
360 struct flowi fl = { .oif = sk->sk_bound_dev_if, 360 struct flowi fl = { .oif = sk->sk_bound_dev_if,
361 .mark = sk->sk_mark,
361 .nl_u = { .ip4_u = 362 .nl_u = { .ip4_u =
362 { .daddr = ((opt && opt->srr) ? 363 { .daddr = ((opt && opt->srr) ?
363 opt->faddr : 364 opt->faddr :
@@ -367,7 +368,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
367 .proto = sk->sk_protocol, 368 .proto = sk->sk_protocol,
368 .flags = inet_sk_flowi_flags(sk), 369 .flags = inet_sk_flowi_flags(sk),
369 .uli_u = { .ports = 370 .uli_u = { .ports =
370 { .sport = inet_sk(sk)->sport, 371 { .sport = inet_sk(sk)->inet_sport,
371 .dport = ireq->rmt_port } } }; 372 .dport = ireq->rmt_port } } };
372 struct net *net = sock_net(sk); 373 struct net *net = sock_net(sk);
373 374
@@ -574,9 +575,9 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
574 newsk->sk_state = TCP_SYN_RECV; 575 newsk->sk_state = TCP_SYN_RECV;
575 newicsk->icsk_bind_hash = NULL; 576 newicsk->icsk_bind_hash = NULL;
576 577
577 inet_sk(newsk)->dport = inet_rsk(req)->rmt_port; 578 inet_sk(newsk)->inet_dport = inet_rsk(req)->rmt_port;
578 inet_sk(newsk)->num = ntohs(inet_rsk(req)->loc_port); 579 inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->loc_port);
579 inet_sk(newsk)->sport = inet_rsk(req)->loc_port; 580 inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port;
580 newsk->sk_write_space = sk_stream_write_space; 581 newsk->sk_write_space = sk_stream_write_space;
581 582
582 newicsk->icsk_retransmits = 0; 583 newicsk->icsk_retransmits = 0;
@@ -607,8 +608,8 @@ void inet_csk_destroy_sock(struct sock *sk)
607 /* It cannot be in hash table! */ 608 /* It cannot be in hash table! */
608 WARN_ON(!sk_unhashed(sk)); 609 WARN_ON(!sk_unhashed(sk));
609 610
610 /* If it has not 0 inet_sk(sk)->num, it must be bound */ 611 /* If it has not 0 inet_sk(sk)->inet_num, it must be bound */
611 WARN_ON(inet_sk(sk)->num && !inet_csk(sk)->icsk_bind_hash); 612 WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash);
612 613
613 sk->sk_prot->destroy(sk); 614 sk->sk_prot->destroy(sk);
614 615
@@ -643,8 +644,8 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
643 * after validation is complete. 644 * after validation is complete.
644 */ 645 */
645 sk->sk_state = TCP_LISTEN; 646 sk->sk_state = TCP_LISTEN;
646 if (!sk->sk_prot->get_port(sk, inet->num)) { 647 if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
647 inet->sport = htons(inet->num); 648 inet->inet_sport = htons(inet->inet_num);
648 649
649 sk_dst_reset(sk); 650 sk_dst_reset(sk);
650 sk->sk_prot->hash(sk); 651 sk->sk_prot->hash(sk);
@@ -720,8 +721,8 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
720 const struct inet_sock *inet = inet_sk(sk); 721 const struct inet_sock *inet = inet_sk(sk);
721 722
722 sin->sin_family = AF_INET; 723 sin->sin_family = AF_INET;
723 sin->sin_addr.s_addr = inet->daddr; 724 sin->sin_addr.s_addr = inet->inet_daddr;
724 sin->sin_port = inet->dport; 725 sin->sin_port = inet->inet_dport;
725} 726}
726 727
727EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); 728EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index a706a47f4dbb..bdb78dd180ce 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -116,10 +116,10 @@ static int inet_csk_diag_fill(struct sock *sk,
116 r->id.idiag_cookie[0] = (u32)(unsigned long)sk; 116 r->id.idiag_cookie[0] = (u32)(unsigned long)sk;
117 r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); 117 r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
118 118
119 r->id.idiag_sport = inet->sport; 119 r->id.idiag_sport = inet->inet_sport;
120 r->id.idiag_dport = inet->dport; 120 r->id.idiag_dport = inet->inet_dport;
121 r->id.idiag_src[0] = inet->rcv_saddr; 121 r->id.idiag_src[0] = inet->inet_rcv_saddr;
122 r->id.idiag_dst[0] = inet->daddr; 122 r->id.idiag_dst[0] = inet->inet_daddr;
123 123
124#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 124#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
125 if (r->idiag_family == AF_INET6) { 125 if (r->idiag_family == AF_INET6) {
@@ -504,11 +504,11 @@ static int inet_csk_diag_dump(struct sock *sk,
504 } else 504 } else
505#endif 505#endif
506 { 506 {
507 entry.saddr = &inet->rcv_saddr; 507 entry.saddr = &inet->inet_rcv_saddr;
508 entry.daddr = &inet->daddr; 508 entry.daddr = &inet->inet_daddr;
509 } 509 }
510 entry.sport = inet->num; 510 entry.sport = inet->inet_num;
511 entry.dport = ntohs(inet->dport); 511 entry.dport = ntohs(inet->inet_dport);
512 entry.userlocks = sk->sk_userlocks; 512 entry.userlocks = sk->sk_userlocks;
513 513
514 if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) 514 if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
@@ -584,7 +584,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
584 if (tmo < 0) 584 if (tmo < 0)
585 tmo = 0; 585 tmo = 0;
586 586
587 r->id.idiag_sport = inet->sport; 587 r->id.idiag_sport = inet->inet_sport;
588 r->id.idiag_dport = ireq->rmt_port; 588 r->id.idiag_dport = ireq->rmt_port;
589 r->id.idiag_src[0] = ireq->loc_addr; 589 r->id.idiag_src[0] = ireq->loc_addr;
590 r->id.idiag_dst[0] = ireq->rmt_addr; 590 r->id.idiag_dst[0] = ireq->rmt_addr;
@@ -639,7 +639,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
639 639
640 if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { 640 if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
641 bc = (struct rtattr *)(r + 1); 641 bc = (struct rtattr *)(r + 1);
642 entry.sport = inet->num; 642 entry.sport = inet->inet_num;
643 entry.userlocks = sk->sk_userlocks; 643 entry.userlocks = sk->sk_userlocks;
644 } 644 }
645 645
@@ -732,7 +732,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
732 continue; 732 continue;
733 } 733 }
734 734
735 if (r->id.idiag_sport != inet->sport && 735 if (r->id.idiag_sport != inet->inet_sport &&
736 r->id.idiag_sport) 736 r->id.idiag_sport)
737 goto next_listen; 737 goto next_listen;
738 738
@@ -774,7 +774,7 @@ skip_listen_ht:
774 if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) 774 if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV)))
775 goto unlock; 775 goto unlock;
776 776
777 for (i = s_i; i < hashinfo->ehash_size; i++) { 777 for (i = s_i; i <= hashinfo->ehash_mask; i++) {
778 struct inet_ehash_bucket *head = &hashinfo->ehash[i]; 778 struct inet_ehash_bucket *head = &hashinfo->ehash[i];
779 spinlock_t *lock = inet_ehash_lockp(hashinfo, i); 779 spinlock_t *lock = inet_ehash_lockp(hashinfo, i);
780 struct sock *sk; 780 struct sock *sk;
@@ -797,10 +797,10 @@ skip_listen_ht:
797 goto next_normal; 797 goto next_normal;
798 if (!(r->idiag_states & (1 << sk->sk_state))) 798 if (!(r->idiag_states & (1 << sk->sk_state)))
799 goto next_normal; 799 goto next_normal;
800 if (r->id.idiag_sport != inet->sport && 800 if (r->id.idiag_sport != inet->inet_sport &&
801 r->id.idiag_sport) 801 r->id.idiag_sport)
802 goto next_normal; 802 goto next_normal;
803 if (r->id.idiag_dport != inet->dport && 803 if (r->id.idiag_dport != inet->inet_dport &&
804 r->id.idiag_dport) 804 r->id.idiag_dport)
805 goto next_normal; 805 goto next_normal;
806 if (inet_csk_diag_dump(sk, skb, cb) < 0) { 806 if (inet_csk_diag_dump(sk, skb, cb) < 0) {
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 625cc5f64c94..47ad7aab51e3 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -64,7 +64,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
64 64
65 atomic_inc(&hashinfo->bsockets); 65 atomic_inc(&hashinfo->bsockets);
66 66
67 inet_sk(sk)->num = snum; 67 inet_sk(sk)->inet_num = snum;
68 sk_add_bind_node(sk, &tb->owners); 68 sk_add_bind_node(sk, &tb->owners);
69 tb->num_owners++; 69 tb->num_owners++;
70 inet_csk(sk)->icsk_bind_hash = tb; 70 inet_csk(sk)->icsk_bind_hash = tb;
@@ -76,7 +76,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
76static void __inet_put_port(struct sock *sk) 76static void __inet_put_port(struct sock *sk)
77{ 77{
78 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 78 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
79 const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->num, 79 const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
80 hashinfo->bhash_size); 80 hashinfo->bhash_size);
81 struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; 81 struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
82 struct inet_bind_bucket *tb; 82 struct inet_bind_bucket *tb;
@@ -88,7 +88,7 @@ static void __inet_put_port(struct sock *sk)
88 __sk_del_bind_node(sk); 88 __sk_del_bind_node(sk);
89 tb->num_owners--; 89 tb->num_owners--;
90 inet_csk(sk)->icsk_bind_hash = NULL; 90 inet_csk(sk)->icsk_bind_hash = NULL;
91 inet_sk(sk)->num = 0; 91 inet_sk(sk)->inet_num = 0;
92 inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); 92 inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
93 spin_unlock(&head->lock); 93 spin_unlock(&head->lock);
94} 94}
@@ -105,7 +105,7 @@ EXPORT_SYMBOL(inet_put_port);
105void __inet_inherit_port(struct sock *sk, struct sock *child) 105void __inet_inherit_port(struct sock *sk, struct sock *child)
106{ 106{
107 struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; 107 struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
108 const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->num, 108 const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->inet_num,
109 table->bhash_size); 109 table->bhash_size);
110 struct inet_bind_hashbucket *head = &table->bhash[bhash]; 110 struct inet_bind_hashbucket *head = &table->bhash[bhash];
111 struct inet_bind_bucket *tb; 111 struct inet_bind_bucket *tb;
@@ -126,9 +126,9 @@ static inline int compute_score(struct sock *sk, struct net *net,
126 int score = -1; 126 int score = -1;
127 struct inet_sock *inet = inet_sk(sk); 127 struct inet_sock *inet = inet_sk(sk);
128 128
129 if (net_eq(sock_net(sk), net) && inet->num == hnum && 129 if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&
130 !ipv6_only_sock(sk)) { 130 !ipv6_only_sock(sk)) {
131 __be32 rcv_saddr = inet->rcv_saddr; 131 __be32 rcv_saddr = inet->inet_rcv_saddr;
132 score = sk->sk_family == PF_INET ? 1 : 0; 132 score = sk->sk_family == PF_INET ? 1 : 0;
133 if (rcv_saddr) { 133 if (rcv_saddr) {
134 if (rcv_saddr != daddr) 134 if (rcv_saddr != daddr)
@@ -209,7 +209,7 @@ struct sock * __inet_lookup_established(struct net *net,
209 * have wildcards anyways. 209 * have wildcards anyways.
210 */ 210 */
211 unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); 211 unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
212 unsigned int slot = hash & (hashinfo->ehash_size - 1); 212 unsigned int slot = hash & hashinfo->ehash_mask;
213 struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; 213 struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
214 214
215 rcu_read_lock(); 215 rcu_read_lock();
@@ -273,13 +273,14 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
273{ 273{
274 struct inet_hashinfo *hinfo = death_row->hashinfo; 274 struct inet_hashinfo *hinfo = death_row->hashinfo;
275 struct inet_sock *inet = inet_sk(sk); 275 struct inet_sock *inet = inet_sk(sk);
276 __be32 daddr = inet->rcv_saddr; 276 __be32 daddr = inet->inet_rcv_saddr;
277 __be32 saddr = inet->daddr; 277 __be32 saddr = inet->inet_daddr;
278 int dif = sk->sk_bound_dev_if; 278 int dif = sk->sk_bound_dev_if;
279 INET_ADDR_COOKIE(acookie, saddr, daddr) 279 INET_ADDR_COOKIE(acookie, saddr, daddr)
280 const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); 280 const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
281 struct net *net = sock_net(sk); 281 struct net *net = sock_net(sk);
282 unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); 282 unsigned int hash = inet_ehashfn(net, daddr, lport,
283 saddr, inet->inet_dport);
283 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); 284 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
284 spinlock_t *lock = inet_ehash_lockp(hinfo, hash); 285 spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
285 struct sock *sk2; 286 struct sock *sk2;
@@ -312,8 +313,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
312unique: 313unique:
313 /* Must record num and sport now. Otherwise we will see 314 /* Must record num and sport now. Otherwise we will see
314 * in hash table socket with a funny identity. */ 315 * in hash table socket with a funny identity. */
315 inet->num = lport; 316 inet->inet_num = lport;
316 inet->sport = htons(lport); 317 inet->inet_sport = htons(lport);
317 sk->sk_hash = hash; 318 sk->sk_hash = hash;
318 WARN_ON(!sk_unhashed(sk)); 319 WARN_ON(!sk_unhashed(sk));
319 __sk_nulls_add_node_rcu(sk, &head->chain); 320 __sk_nulls_add_node_rcu(sk, &head->chain);
@@ -341,8 +342,9 @@ not_unique:
341static inline u32 inet_sk_port_offset(const struct sock *sk) 342static inline u32 inet_sk_port_offset(const struct sock *sk)
342{ 343{
343 const struct inet_sock *inet = inet_sk(sk); 344 const struct inet_sock *inet = inet_sk(sk);
344 return secure_ipv4_port_ephemeral(inet->rcv_saddr, inet->daddr, 345 return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr,
345 inet->dport); 346 inet->inet_daddr,
347 inet->inet_dport);
346} 348}
347 349
348void __inet_hash_nolisten(struct sock *sk) 350void __inet_hash_nolisten(struct sock *sk)
@@ -424,7 +426,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
424 void (*hash)(struct sock *sk)) 426 void (*hash)(struct sock *sk))
425{ 427{
426 struct inet_hashinfo *hinfo = death_row->hashinfo; 428 struct inet_hashinfo *hinfo = death_row->hashinfo;
427 const unsigned short snum = inet_sk(sk)->num; 429 const unsigned short snum = inet_sk(sk)->inet_num;
428 struct inet_bind_hashbucket *head; 430 struct inet_bind_hashbucket *head;
429 struct inet_bind_bucket *tb; 431 struct inet_bind_bucket *tb;
430 int ret; 432 int ret;
@@ -485,7 +487,7 @@ ok:
485 /* Head lock still held and bh's disabled */ 487 /* Head lock still held and bh's disabled */
486 inet_bind_hash(sk, tb, port); 488 inet_bind_hash(sk, tb, port);
487 if (sk_unhashed(sk)) { 489 if (sk_unhashed(sk)) {
488 inet_sk(sk)->sport = htons(port); 490 inet_sk(sk)->inet_sport = htons(port);
489 hash(sk); 491 hash(sk);
490 } 492 }
491 spin_unlock(&head->lock); 493 spin_unlock(&head->lock);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 13f0781f35cd..1f5d508bb18b 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -86,7 +86,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
86 Note, that any socket with inet->num != 0 MUST be bound in 86 Note, that any socket with inet->num != 0 MUST be bound in
87 binding cache, even if it is closed. 87 binding cache, even if it is closed.
88 */ 88 */
89 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num, 89 bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
90 hashinfo->bhash_size)]; 90 hashinfo->bhash_size)];
91 spin_lock(&bhead->lock); 91 spin_lock(&bhead->lock);
92 tw->tw_tb = icsk->icsk_bind_hash; 92 tw->tw_tb = icsk->icsk_bind_hash;
@@ -124,14 +124,14 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
124 kmemcheck_annotate_bitfield(tw, flags); 124 kmemcheck_annotate_bitfield(tw, flags);
125 125
126 /* Give us an identity. */ 126 /* Give us an identity. */
127 tw->tw_daddr = inet->daddr; 127 tw->tw_daddr = inet->inet_daddr;
128 tw->tw_rcv_saddr = inet->rcv_saddr; 128 tw->tw_rcv_saddr = inet->inet_rcv_saddr;
129 tw->tw_bound_dev_if = sk->sk_bound_dev_if; 129 tw->tw_bound_dev_if = sk->sk_bound_dev_if;
130 tw->tw_num = inet->num; 130 tw->tw_num = inet->inet_num;
131 tw->tw_state = TCP_TIME_WAIT; 131 tw->tw_state = TCP_TIME_WAIT;
132 tw->tw_substate = state; 132 tw->tw_substate = state;
133 tw->tw_sport = inet->sport; 133 tw->tw_sport = inet->inet_sport;
134 tw->tw_dport = inet->dport; 134 tw->tw_dport = inet->inet_dport;
135 tw->tw_family = sk->sk_family; 135 tw->tw_family = sk->sk_family;
136 tw->tw_reuse = sk->sk_reuse; 136 tw->tw_reuse = sk->sk_reuse;
137 tw->tw_hash = sk->sk_hash; 137 tw->tw_hash = sk->sk_hash;
@@ -430,7 +430,7 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo,
430 int h; 430 int h;
431 431
432 local_bh_disable(); 432 local_bh_disable();
433 for (h = 0; h < (hashinfo->ehash_size); h++) { 433 for (h = 0; h <= hashinfo->ehash_mask; h++) {
434 struct inet_ehash_bucket *head = 434 struct inet_ehash_bucket *head =
435 inet_ehash_bucket(hashinfo, h); 435 inet_ehash_bucket(hashinfo, h);
436 spinlock_t *lock = inet_ehash_lockp(hashinfo, h); 436 spinlock_t *lock = inet_ehash_lockp(hashinfo, h);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 575f9bd51ccd..b007f8af6e1f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -206,10 +206,11 @@ static void ip_expire(unsigned long arg)
206 struct sk_buff *head = qp->q.fragments; 206 struct sk_buff *head = qp->q.fragments;
207 207
208 /* Send an ICMP "Fragment Reassembly Timeout" message. */ 208 /* Send an ICMP "Fragment Reassembly Timeout" message. */
209 if ((head->dev = dev_get_by_index(net, qp->iif)) != NULL) { 209 rcu_read_lock();
210 head->dev = dev_get_by_index_rcu(net, qp->iif);
211 if (head->dev)
210 icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); 212 icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
211 dev_put(head->dev); 213 rcu_read_unlock();
212 }
213 } 214 }
214out: 215out:
215 spin_unlock(&qp->q.lock); 216 spin_unlock(&qp->q.lock);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 143333852624..a7de9e3a8f18 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -156,8 +156,13 @@ struct ipgre_net {
156#define tunnels_r tunnels[2] 156#define tunnels_r tunnels[2]
157#define tunnels_l tunnels[1] 157#define tunnels_l tunnels[1]
158#define tunnels_wc tunnels[0] 158#define tunnels_wc tunnels[0]
159/*
160 * Locking : hash tables are protected by RCU and a spinlock
161 */
162static DEFINE_SPINLOCK(ipgre_lock);
159 163
160static DEFINE_RWLOCK(ipgre_lock); 164#define for_each_ip_tunnel_rcu(start) \
165 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
161 166
162/* Given src, dst and key, find appropriate for input tunnel. */ 167/* Given src, dst and key, find appropriate for input tunnel. */
163 168
@@ -175,7 +180,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
175 ARPHRD_ETHER : ARPHRD_IPGRE; 180 ARPHRD_ETHER : ARPHRD_IPGRE;
176 int score, cand_score = 4; 181 int score, cand_score = 4;
177 182
178 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { 183 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
179 if (local != t->parms.iph.saddr || 184 if (local != t->parms.iph.saddr ||
180 remote != t->parms.iph.daddr || 185 remote != t->parms.iph.daddr ||
181 key != t->parms.i_key || 186 key != t->parms.i_key ||
@@ -200,7 +205,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
200 } 205 }
201 } 206 }
202 207
203 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { 208 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
204 if (remote != t->parms.iph.daddr || 209 if (remote != t->parms.iph.daddr ||
205 key != t->parms.i_key || 210 key != t->parms.i_key ||
206 !(t->dev->flags & IFF_UP)) 211 !(t->dev->flags & IFF_UP))
@@ -224,7 +229,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
224 } 229 }
225 } 230 }
226 231
227 for (t = ign->tunnels_l[h1]; t; t = t->next) { 232 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
228 if ((local != t->parms.iph.saddr && 233 if ((local != t->parms.iph.saddr &&
229 (local != t->parms.iph.daddr || 234 (local != t->parms.iph.daddr ||
230 !ipv4_is_multicast(local))) || 235 !ipv4_is_multicast(local))) ||
@@ -250,7 +255,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
250 } 255 }
251 } 256 }
252 257
253 for (t = ign->tunnels_wc[h1]; t; t = t->next) { 258 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
254 if (t->parms.i_key != key || 259 if (t->parms.i_key != key ||
255 !(t->dev->flags & IFF_UP)) 260 !(t->dev->flags & IFF_UP))
256 continue; 261 continue;
@@ -276,8 +281,9 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
276 if (cand != NULL) 281 if (cand != NULL)
277 return cand; 282 return cand;
278 283
279 if (ign->fb_tunnel_dev->flags & IFF_UP) 284 dev = ign->fb_tunnel_dev;
280 return netdev_priv(ign->fb_tunnel_dev); 285 if (dev->flags & IFF_UP)
286 return netdev_priv(dev);
281 287
282 return NULL; 288 return NULL;
283} 289}
@@ -311,10 +317,10 @@ static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
311{ 317{
312 struct ip_tunnel **tp = ipgre_bucket(ign, t); 318 struct ip_tunnel **tp = ipgre_bucket(ign, t);
313 319
320 spin_lock_bh(&ipgre_lock);
314 t->next = *tp; 321 t->next = *tp;
315 write_lock_bh(&ipgre_lock); 322 rcu_assign_pointer(*tp, t);
316 *tp = t; 323 spin_unlock_bh(&ipgre_lock);
317 write_unlock_bh(&ipgre_lock);
318} 324}
319 325
320static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) 326static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
@@ -323,9 +329,9 @@ static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
323 329
324 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) { 330 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
325 if (t == *tp) { 331 if (t == *tp) {
326 write_lock_bh(&ipgre_lock); 332 spin_lock_bh(&ipgre_lock);
327 *tp = t->next; 333 *tp = t->next;
328 write_unlock_bh(&ipgre_lock); 334 spin_unlock_bh(&ipgre_lock);
329 break; 335 break;
330 } 336 }
331 } 337 }
@@ -476,7 +482,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
476 break; 482 break;
477 } 483 }
478 484
479 read_lock(&ipgre_lock); 485 rcu_read_lock();
480 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, 486 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
481 flags & GRE_KEY ? 487 flags & GRE_KEY ?
482 *(((__be32 *)p) + (grehlen / 4) - 1) : 0, 488 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
@@ -494,7 +500,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
494 t->err_count = 1; 500 t->err_count = 1;
495 t->err_time = jiffies; 501 t->err_time = jiffies;
496out: 502out:
497 read_unlock(&ipgre_lock); 503 rcu_read_unlock();
498 return; 504 return;
499} 505}
500 506
@@ -573,7 +579,7 @@ static int ipgre_rcv(struct sk_buff *skb)
573 579
574 gre_proto = *(__be16 *)(h + 2); 580 gre_proto = *(__be16 *)(h + 2);
575 581
576 read_lock(&ipgre_lock); 582 rcu_read_lock();
577 if ((tunnel = ipgre_tunnel_lookup(skb->dev, 583 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
578 iph->saddr, iph->daddr, key, 584 iph->saddr, iph->daddr, key,
579 gre_proto))) { 585 gre_proto))) {
@@ -647,13 +653,13 @@ static int ipgre_rcv(struct sk_buff *skb)
647 ipgre_ecn_decapsulate(iph, skb); 653 ipgre_ecn_decapsulate(iph, skb);
648 654
649 netif_rx(skb); 655 netif_rx(skb);
650 read_unlock(&ipgre_lock); 656 rcu_read_unlock();
651 return(0); 657 return(0);
652 } 658 }
653 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 659 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
654 660
655drop: 661drop:
656 read_unlock(&ipgre_lock); 662 rcu_read_unlock();
657drop_nolock: 663drop_nolock:
658 kfree_skb(skb); 664 kfree_skb(skb);
659 return(0); 665 return(0);
@@ -662,7 +668,8 @@ drop_nolock:
662static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 668static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
663{ 669{
664 struct ip_tunnel *tunnel = netdev_priv(dev); 670 struct ip_tunnel *tunnel = netdev_priv(dev);
665 struct net_device_stats *stats = &tunnel->dev->stats; 671 struct net_device_stats *stats = &dev->stats;
672 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
666 struct iphdr *old_iph = ip_hdr(skb); 673 struct iphdr *old_iph = ip_hdr(skb);
667 struct iphdr *tiph; 674 struct iphdr *tiph;
668 u8 tos; 675 u8 tos;
@@ -810,7 +817,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
810 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 817 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
811 if (!new_skb) { 818 if (!new_skb) {
812 ip_rt_put(rt); 819 ip_rt_put(rt);
813 stats->tx_dropped++; 820 txq->tx_dropped++;
814 dev_kfree_skb(skb); 821 dev_kfree_skb(skb);
815 return NETDEV_TX_OK; 822 return NETDEV_TX_OK;
816 } 823 }
@@ -1283,16 +1290,19 @@ static const struct net_protocol ipgre_protocol = {
1283 .netns_ok = 1, 1290 .netns_ok = 1,
1284}; 1291};
1285 1292
1286static void ipgre_destroy_tunnels(struct ipgre_net *ign) 1293static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1287{ 1294{
1288 int prio; 1295 int prio;
1289 1296
1290 for (prio = 0; prio < 4; prio++) { 1297 for (prio = 0; prio < 4; prio++) {
1291 int h; 1298 int h;
1292 for (h = 0; h < HASH_SIZE; h++) { 1299 for (h = 0; h < HASH_SIZE; h++) {
1293 struct ip_tunnel *t; 1300 struct ip_tunnel *t = ign->tunnels[prio][h];
1294 while ((t = ign->tunnels[prio][h]) != NULL) 1301
1295 unregister_netdevice(t->dev); 1302 while (t != NULL) {
1303 unregister_netdevice_queue(t->dev, head);
1304 t = t->next;
1305 }
1296 } 1306 }
1297 } 1307 }
1298} 1308}
@@ -1340,10 +1350,12 @@ err_alloc:
1340static void ipgre_exit_net(struct net *net) 1350static void ipgre_exit_net(struct net *net)
1341{ 1351{
1342 struct ipgre_net *ign; 1352 struct ipgre_net *ign;
1353 LIST_HEAD(list);
1343 1354
1344 ign = net_generic(net, ipgre_net_id); 1355 ign = net_generic(net, ipgre_net_id);
1345 rtnl_lock(); 1356 rtnl_lock();
1346 ipgre_destroy_tunnels(ign); 1357 ipgre_destroy_tunnels(ign, &list);
1358 unregister_netdevice_many(&list);
1347 rtnl_unlock(); 1359 rtnl_unlock();
1348 kfree(ign); 1360 kfree(ign);
1349} 1361}
@@ -1471,7 +1483,7 @@ static void ipgre_tap_setup(struct net_device *dev)
1471 dev->features |= NETIF_F_NETNS_LOCAL; 1483 dev->features |= NETIF_F_NETNS_LOCAL;
1472} 1484}
1473 1485
1474static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[], 1486static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
1475 struct nlattr *data[]) 1487 struct nlattr *data[])
1476{ 1488{
1477 struct ip_tunnel *nt; 1489 struct ip_tunnel *nt;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 6c98b43badf4..fdf51badc8e5 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -161,7 +161,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
161 /* If socket is bound to an interface, only report 161 /* If socket is bound to an interface, only report
162 * the packet if it came from that interface. 162 * the packet if it came from that interface.
163 */ 163 */
164 if (sk && inet_sk(sk)->num == protocol && 164 if (sk && inet_sk(sk)->inet_num == protocol &&
165 (!sk->sk_bound_dev_if || 165 (!sk->sk_bound_dev_if ||
166 sk->sk_bound_dev_if == dev->ifindex) && 166 sk->sk_bound_dev_if == dev->ifindex) &&
167 sock_net(sk) == dev_net(dev)) { 167 sock_net(sk) == dev_net(dev)) {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index f9895180f481..322b40864ac0 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -329,7 +329,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
329 __be32 daddr; 329 __be32 daddr;
330 330
331 /* Use correct destination address if we have options. */ 331 /* Use correct destination address if we have options. */
332 daddr = inet->daddr; 332 daddr = inet->inet_daddr;
333 if(opt && opt->srr) 333 if(opt && opt->srr)
334 daddr = opt->faddr; 334 daddr = opt->faddr;
335 335
@@ -338,13 +338,13 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
338 .mark = sk->sk_mark, 338 .mark = sk->sk_mark,
339 .nl_u = { .ip4_u = 339 .nl_u = { .ip4_u =
340 { .daddr = daddr, 340 { .daddr = daddr,
341 .saddr = inet->saddr, 341 .saddr = inet->inet_saddr,
342 .tos = RT_CONN_FLAGS(sk) } }, 342 .tos = RT_CONN_FLAGS(sk) } },
343 .proto = sk->sk_protocol, 343 .proto = sk->sk_protocol,
344 .flags = inet_sk_flowi_flags(sk), 344 .flags = inet_sk_flowi_flags(sk),
345 .uli_u = { .ports = 345 .uli_u = { .ports =
346 { .sport = inet->sport, 346 { .sport = inet->inet_sport,
347 .dport = inet->dport } } }; 347 .dport = inet->inet_dport } } };
348 348
349 /* If this fails, retransmit mechanism of transport layer will 349 /* If this fails, retransmit mechanism of transport layer will
350 * keep trying until route appears or the connection times 350 * keep trying until route appears or the connection times
@@ -379,7 +379,7 @@ packet_routed:
379 379
380 if (opt && opt->optlen) { 380 if (opt && opt->optlen) {
381 iph->ihl += opt->optlen >> 2; 381 iph->ihl += opt->optlen >> 2;
382 ip_options_build(skb, opt, inet->daddr, rt, 0); 382 ip_options_build(skb, opt, inet->inet_daddr, rt, 0);
383 } 383 }
384 384
385 ip_select_ident_more(iph, &rt->u.dst, sk, 385 ip_select_ident_more(iph, &rt->u.dst, sk,
@@ -846,7 +846,8 @@ int ip_append_data(struct sock *sk,
846 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 846 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
847 847
848 if (inet->cork.length + length > 0xFFFF - fragheaderlen) { 848 if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
849 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen); 849 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
850 mtu-exthdrlen);
850 return -EMSGSIZE; 851 return -EMSGSIZE;
851 } 852 }
852 853
@@ -1100,7 +1101,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1100 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 1101 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1101 1102
1102 if (inet->cork.length + size > 0xFFFF - fragheaderlen) { 1103 if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
1103 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu); 1104 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, mtu);
1104 return -EMSGSIZE; 1105 return -EMSGSIZE;
1105 } 1106 }
1106 1107
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index e982b5c1ee17..cafad9baff03 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -245,7 +245,7 @@ int ip_ra_control(struct sock *sk, unsigned char on,
245{ 245{
246 struct ip_ra_chain *ra, *new_ra, **rap; 246 struct ip_ra_chain *ra, *new_ra, **rap;
247 247
248 if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num == IPPROTO_RAW) 248 if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
249 return -EINVAL; 249 return -EINVAL;
250 250
251 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; 251 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
@@ -480,7 +480,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
480 case IP_OPTIONS: 480 case IP_OPTIONS:
481 { 481 {
482 struct ip_options *opt = NULL; 482 struct ip_options *opt = NULL;
483 if (optlen > 40 || optlen < 0) 483 if (optlen > 40)
484 goto e_inval; 484 goto e_inval;
485 err = ip_options_get_from_user(sock_net(sk), &opt, 485 err = ip_options_get_from_user(sock_net(sk), &opt,
486 optval, optlen); 486 optval, optlen);
@@ -492,7 +492,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
492 if (sk->sk_family == PF_INET || 492 if (sk->sk_family == PF_INET ||
493 (!((1 << sk->sk_state) & 493 (!((1 << sk->sk_state) &
494 (TCPF_LISTEN | TCPF_CLOSE)) && 494 (TCPF_LISTEN | TCPF_CLOSE)) &&
495 inet->daddr != LOOPBACK4_IPV6)) { 495 inet->inet_daddr != LOOPBACK4_IPV6)) {
496#endif 496#endif
497 if (inet->opt) 497 if (inet->opt)
498 icsk->icsk_ext_hdr_len -= inet->opt->optlen; 498 icsk->icsk_ext_hdr_len -= inet->opt->optlen;
@@ -575,7 +575,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
575 inet->hdrincl = val ? 1 : 0; 575 inet->hdrincl = val ? 1 : 0;
576 break; 576 break;
577 case IP_MTU_DISCOVER: 577 case IP_MTU_DISCOVER:
578 if (val < 0 || val > 3) 578 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
579 goto e_inval; 579 goto e_inval;
580 inet->pmtudisc = val; 580 inet->pmtudisc = val;
581 break; 581 break;
@@ -1180,8 +1180,8 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1180 if (inet->cmsg_flags & IP_CMSG_PKTINFO) { 1180 if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
1181 struct in_pktinfo info; 1181 struct in_pktinfo info;
1182 1182
1183 info.ipi_addr.s_addr = inet->rcv_saddr; 1183 info.ipi_addr.s_addr = inet->inet_rcv_saddr;
1184 info.ipi_spec_dst.s_addr = inet->rcv_saddr; 1184 info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr;
1185 info.ipi_ifindex = inet->mc_index; 1185 info.ipi_ifindex = inet->mc_index;
1186 put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); 1186 put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
1187 } 1187 }
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index ae40ed1ba560..c5b1f71c3cd8 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -134,7 +134,13 @@ static void ipip_fb_tunnel_init(struct net_device *dev);
134static void ipip_tunnel_init(struct net_device *dev); 134static void ipip_tunnel_init(struct net_device *dev);
135static void ipip_tunnel_setup(struct net_device *dev); 135static void ipip_tunnel_setup(struct net_device *dev);
136 136
137static DEFINE_RWLOCK(ipip_lock); 137/*
138 * Locking : hash tables are protected by RCU and a spinlock
139 */
140static DEFINE_SPINLOCK(ipip_lock);
141
142#define for_each_ip_tunnel_rcu(start) \
143 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
138 144
139static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, 145static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
140 __be32 remote, __be32 local) 146 __be32 remote, __be32 local)
@@ -144,20 +150,21 @@ static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
144 struct ip_tunnel *t; 150 struct ip_tunnel *t;
145 struct ipip_net *ipn = net_generic(net, ipip_net_id); 151 struct ipip_net *ipn = net_generic(net, ipip_net_id);
146 152
147 for (t = ipn->tunnels_r_l[h0^h1]; t; t = t->next) { 153 for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
148 if (local == t->parms.iph.saddr && 154 if (local == t->parms.iph.saddr &&
149 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 155 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
150 return t; 156 return t;
151 } 157
152 for (t = ipn->tunnels_r[h0]; t; t = t->next) { 158 for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
153 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 159 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
154 return t; 160 return t;
155 } 161
156 for (t = ipn->tunnels_l[h1]; t; t = t->next) { 162 for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
157 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) 163 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
158 return t; 164 return t;
159 } 165
160 if ((t = ipn->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP)) 166 t = rcu_dereference(ipn->tunnels_wc[0]);
167 if (t && (t->dev->flags&IFF_UP))
161 return t; 168 return t;
162 return NULL; 169 return NULL;
163} 170}
@@ -193,9 +200,9 @@ static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
193 200
194 for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) { 201 for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) {
195 if (t == *tp) { 202 if (t == *tp) {
196 write_lock_bh(&ipip_lock); 203 spin_lock_bh(&ipip_lock);
197 *tp = t->next; 204 *tp = t->next;
198 write_unlock_bh(&ipip_lock); 205 spin_unlock_bh(&ipip_lock);
199 break; 206 break;
200 } 207 }
201 } 208 }
@@ -205,10 +212,10 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
205{ 212{
206 struct ip_tunnel **tp = ipip_bucket(ipn, t); 213 struct ip_tunnel **tp = ipip_bucket(ipn, t);
207 214
215 spin_lock_bh(&ipip_lock);
208 t->next = *tp; 216 t->next = *tp;
209 write_lock_bh(&ipip_lock); 217 rcu_assign_pointer(*tp, t);
210 *tp = t; 218 spin_unlock_bh(&ipip_lock);
211 write_unlock_bh(&ipip_lock);
212} 219}
213 220
214static struct ip_tunnel * ipip_tunnel_locate(struct net *net, 221static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
@@ -267,9 +274,9 @@ static void ipip_tunnel_uninit(struct net_device *dev)
267 struct ipip_net *ipn = net_generic(net, ipip_net_id); 274 struct ipip_net *ipn = net_generic(net, ipip_net_id);
268 275
269 if (dev == ipn->fb_tunnel_dev) { 276 if (dev == ipn->fb_tunnel_dev) {
270 write_lock_bh(&ipip_lock); 277 spin_lock_bh(&ipip_lock);
271 ipn->tunnels_wc[0] = NULL; 278 ipn->tunnels_wc[0] = NULL;
272 write_unlock_bh(&ipip_lock); 279 spin_unlock_bh(&ipip_lock);
273 } else 280 } else
274 ipip_tunnel_unlink(ipn, netdev_priv(dev)); 281 ipip_tunnel_unlink(ipn, netdev_priv(dev));
275 dev_put(dev); 282 dev_put(dev);
@@ -318,7 +325,7 @@ static int ipip_err(struct sk_buff *skb, u32 info)
318 325
319 err = -ENOENT; 326 err = -ENOENT;
320 327
321 read_lock(&ipip_lock); 328 rcu_read_lock();
322 t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); 329 t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
323 if (t == NULL || t->parms.iph.daddr == 0) 330 if (t == NULL || t->parms.iph.daddr == 0)
324 goto out; 331 goto out;
@@ -333,7 +340,7 @@ static int ipip_err(struct sk_buff *skb, u32 info)
333 t->err_count = 1; 340 t->err_count = 1;
334 t->err_time = jiffies; 341 t->err_time = jiffies;
335out: 342out:
336 read_unlock(&ipip_lock); 343 rcu_read_unlock();
337 return err; 344 return err;
338} 345}
339 346
@@ -351,11 +358,11 @@ static int ipip_rcv(struct sk_buff *skb)
351 struct ip_tunnel *tunnel; 358 struct ip_tunnel *tunnel;
352 const struct iphdr *iph = ip_hdr(skb); 359 const struct iphdr *iph = ip_hdr(skb);
353 360
354 read_lock(&ipip_lock); 361 rcu_read_lock();
355 if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev), 362 if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev),
356 iph->saddr, iph->daddr)) != NULL) { 363 iph->saddr, iph->daddr)) != NULL) {
357 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 364 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
358 read_unlock(&ipip_lock); 365 rcu_read_unlock();
359 kfree_skb(skb); 366 kfree_skb(skb);
360 return 0; 367 return 0;
361 } 368 }
@@ -374,10 +381,10 @@ static int ipip_rcv(struct sk_buff *skb)
374 nf_reset(skb); 381 nf_reset(skb);
375 ipip_ecn_decapsulate(iph, skb); 382 ipip_ecn_decapsulate(iph, skb);
376 netif_rx(skb); 383 netif_rx(skb);
377 read_unlock(&ipip_lock); 384 rcu_read_unlock();
378 return 0; 385 return 0;
379 } 386 }
380 read_unlock(&ipip_lock); 387 rcu_read_unlock();
381 388
382 return -1; 389 return -1;
383} 390}
@@ -390,7 +397,8 @@ static int ipip_rcv(struct sk_buff *skb)
390static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 397static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
391{ 398{
392 struct ip_tunnel *tunnel = netdev_priv(dev); 399 struct ip_tunnel *tunnel = netdev_priv(dev);
393 struct net_device_stats *stats = &tunnel->dev->stats; 400 struct net_device_stats *stats = &dev->stats;
401 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
394 struct iphdr *tiph = &tunnel->parms.iph; 402 struct iphdr *tiph = &tunnel->parms.iph;
395 u8 tos = tunnel->parms.iph.tos; 403 u8 tos = tunnel->parms.iph.tos;
396 __be16 df = tiph->frag_off; 404 __be16 df = tiph->frag_off;
@@ -480,7 +488,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
480 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 488 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
481 if (!new_skb) { 489 if (!new_skb) {
482 ip_rt_put(rt); 490 ip_rt_put(rt);
483 stats->tx_dropped++; 491 txq->tx_dropped++;
484 dev_kfree_skb(skb); 492 dev_kfree_skb(skb);
485 return NETDEV_TX_OK; 493 return NETDEV_TX_OK;
486 } 494 }
@@ -748,16 +756,19 @@ static struct xfrm_tunnel ipip_handler = {
748static const char banner[] __initconst = 756static const char banner[] __initconst =
749 KERN_INFO "IPv4 over IPv4 tunneling driver\n"; 757 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
750 758
751static void ipip_destroy_tunnels(struct ipip_net *ipn) 759static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
752{ 760{
753 int prio; 761 int prio;
754 762
755 for (prio = 1; prio < 4; prio++) { 763 for (prio = 1; prio < 4; prio++) {
756 int h; 764 int h;
757 for (h = 0; h < HASH_SIZE; h++) { 765 for (h = 0; h < HASH_SIZE; h++) {
758 struct ip_tunnel *t; 766 struct ip_tunnel *t = ipn->tunnels[prio][h];
759 while ((t = ipn->tunnels[prio][h]) != NULL) 767
760 unregister_netdevice(t->dev); 768 while (t != NULL) {
769 unregister_netdevice_queue(t->dev, head);
770 t = t->next;
771 }
761 } 772 }
762 } 773 }
763} 774}
@@ -810,11 +821,13 @@ err_alloc:
810static void ipip_exit_net(struct net *net) 821static void ipip_exit_net(struct net *net)
811{ 822{
812 struct ipip_net *ipn; 823 struct ipip_net *ipn;
824 LIST_HEAD(list);
813 825
814 ipn = net_generic(net, ipip_net_id); 826 ipn = net_generic(net, ipip_net_id);
815 rtnl_lock(); 827 rtnl_lock();
816 ipip_destroy_tunnels(ipn); 828 ipip_destroy_tunnels(ipn, &list);
817 unregister_netdevice(ipn->fb_tunnel_dev); 829 unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
830 unregister_netdevice_many(&list);
818 rtnl_unlock(); 831 rtnl_unlock();
819 kfree(ipn); 832 kfree(ipn);
820} 833}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 630a56df7b47..ef4ee45b928f 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -275,7 +275,8 @@ failure:
275 * @notify: Set to 1, if the caller is a notifier_call 275 * @notify: Set to 1, if the caller is a notifier_call
276 */ 276 */
277 277
278static int vif_delete(struct net *net, int vifi, int notify) 278static int vif_delete(struct net *net, int vifi, int notify,
279 struct list_head *head)
279{ 280{
280 struct vif_device *v; 281 struct vif_device *v;
281 struct net_device *dev; 282 struct net_device *dev;
@@ -319,7 +320,7 @@ static int vif_delete(struct net *net, int vifi, int notify)
319 } 320 }
320 321
321 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify) 322 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
322 unregister_netdevice(dev); 323 unregister_netdevice_queue(dev, head);
323 324
324 dev_put(dev); 325 dev_put(dev);
325 return 0; 326 return 0;
@@ -469,8 +470,18 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
469 return err; 470 return err;
470 } 471 }
471 break; 472 break;
473
474 case VIFF_USE_IFINDEX:
472 case 0: 475 case 0:
473 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 476 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
477 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
478 if (dev && dev->ip_ptr == NULL) {
479 dev_put(dev);
480 return -EADDRNOTAVAIL;
481 }
482 } else
483 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
484
474 if (!dev) 485 if (!dev)
475 return -EADDRNOTAVAIL; 486 return -EADDRNOTAVAIL;
476 err = dev_set_allmulti(dev, 1); 487 err = dev_set_allmulti(dev, 1);
@@ -860,14 +871,16 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
860static void mroute_clean_tables(struct net *net) 871static void mroute_clean_tables(struct net *net)
861{ 872{
862 int i; 873 int i;
874 LIST_HEAD(list);
863 875
864 /* 876 /*
865 * Shut down all active vif entries 877 * Shut down all active vif entries
866 */ 878 */
867 for (i = 0; i < net->ipv4.maxvif; i++) { 879 for (i = 0; i < net->ipv4.maxvif; i++) {
868 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC)) 880 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
869 vif_delete(net, i, 0); 881 vif_delete(net, i, 0, &list);
870 } 882 }
883 unregister_netdevice_many(&list);
871 884
872 /* 885 /*
873 * Wipe the cache 886 * Wipe the cache
@@ -946,7 +959,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
946 switch (optname) { 959 switch (optname) {
947 case MRT_INIT: 960 case MRT_INIT:
948 if (sk->sk_type != SOCK_RAW || 961 if (sk->sk_type != SOCK_RAW ||
949 inet_sk(sk)->num != IPPROTO_IGMP) 962 inet_sk(sk)->inet_num != IPPROTO_IGMP)
950 return -EOPNOTSUPP; 963 return -EOPNOTSUPP;
951 if (optlen != sizeof(int)) 964 if (optlen != sizeof(int))
952 return -ENOPROTOOPT; 965 return -ENOPROTOOPT;
@@ -983,7 +996,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
983 if (optname == MRT_ADD_VIF) { 996 if (optname == MRT_ADD_VIF) {
984 ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk); 997 ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
985 } else { 998 } else {
986 ret = vif_delete(net, vif.vifc_vifi, 0); 999 ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
987 } 1000 }
988 rtnl_unlock(); 1001 rtnl_unlock();
989 return ret; 1002 return ret;
@@ -1146,6 +1159,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
1146 struct net *net = dev_net(dev); 1159 struct net *net = dev_net(dev);
1147 struct vif_device *v; 1160 struct vif_device *v;
1148 int ct; 1161 int ct;
1162 LIST_HEAD(list);
1149 1163
1150 if (!net_eq(dev_net(dev), net)) 1164 if (!net_eq(dev_net(dev), net))
1151 return NOTIFY_DONE; 1165 return NOTIFY_DONE;
@@ -1155,8 +1169,9 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
1155 v = &net->ipv4.vif_table[0]; 1169 v = &net->ipv4.vif_table[0];
1156 for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) { 1170 for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1157 if (v->dev == dev) 1171 if (v->dev == dev)
1158 vif_delete(net, ct, 1); 1172 vif_delete(net, ct, 1, &list);
1159 } 1173 }
1174 unregister_netdevice_many(&list);
1160 return NOTIFY_DONE; 1175 return NOTIFY_DONE;
1161} 1176}
1162 1177
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index aa95bb82ee6c..9cd423ffafa8 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -255,10 +255,10 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
255 struct nf_conntrack_tuple tuple; 255 struct nf_conntrack_tuple tuple;
256 256
257 memset(&tuple, 0, sizeof(tuple)); 257 memset(&tuple, 0, sizeof(tuple));
258 tuple.src.u3.ip = inet->rcv_saddr; 258 tuple.src.u3.ip = inet->inet_rcv_saddr;
259 tuple.src.u.tcp.port = inet->sport; 259 tuple.src.u.tcp.port = inet->inet_sport;
260 tuple.dst.u3.ip = inet->daddr; 260 tuple.dst.u3.ip = inet->inet_daddr;
261 tuple.dst.u.tcp.port = inet->dport; 261 tuple.dst.u.tcp.port = inet->inet_dport;
262 tuple.src.l3num = PF_INET; 262 tuple.src.l3num = PF_INET;
263 tuple.dst.protonum = sk->sk_protocol; 263 tuple.dst.protonum = sk->sk_protocol;
264 264
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ab996f9c0fe0..ce154b47f1da 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -87,7 +87,7 @@ void raw_hash_sk(struct sock *sk)
87 struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; 87 struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
88 struct hlist_head *head; 88 struct hlist_head *head;
89 89
90 head = &h->ht[inet_sk(sk)->num & (RAW_HTABLE_SIZE - 1)]; 90 head = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)];
91 91
92 write_lock_bh(&h->lock); 92 write_lock_bh(&h->lock);
93 sk_add_node(sk, head); 93 sk_add_node(sk, head);
@@ -115,9 +115,9 @@ static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
115 sk_for_each_from(sk, node) { 115 sk_for_each_from(sk, node) {
116 struct inet_sock *inet = inet_sk(sk); 116 struct inet_sock *inet = inet_sk(sk);
117 117
118 if (net_eq(sock_net(sk), net) && inet->num == num && 118 if (net_eq(sock_net(sk), net) && inet->inet_num == num &&
119 !(inet->daddr && inet->daddr != raddr) && 119 !(inet->inet_daddr && inet->inet_daddr != raddr) &&
120 !(inet->rcv_saddr && inet->rcv_saddr != laddr) && 120 !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
121 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) 121 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
122 goto found; /* gotcha */ 122 goto found; /* gotcha */
123 } 123 }
@@ -292,7 +292,6 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
292 /* Charge it to the socket. */ 292 /* Charge it to the socket. */
293 293
294 if (sock_queue_rcv_skb(sk, skb) < 0) { 294 if (sock_queue_rcv_skb(sk, skb) < 0) {
295 atomic_inc(&sk->sk_drops);
296 kfree_skb(skb); 295 kfree_skb(skb);
297 return NET_RX_DROP; 296 return NET_RX_DROP;
298 } 297 }
@@ -327,7 +326,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
327 int err; 326 int err;
328 327
329 if (length > rt->u.dst.dev->mtu) { 328 if (length > rt->u.dst.dev->mtu) {
330 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, 329 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
331 rt->u.dst.dev->mtu); 330 rt->u.dst.dev->mtu);
332 return -EMSGSIZE; 331 return -EMSGSIZE;
333 } 332 }
@@ -500,10 +499,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
500 err = -EDESTADDRREQ; 499 err = -EDESTADDRREQ;
501 if (sk->sk_state != TCP_ESTABLISHED) 500 if (sk->sk_state != TCP_ESTABLISHED)
502 goto out; 501 goto out;
503 daddr = inet->daddr; 502 daddr = inet->inet_daddr;
504 } 503 }
505 504
506 ipc.addr = inet->saddr; 505 ipc.addr = inet->inet_saddr;
507 ipc.opt = NULL; 506 ipc.opt = NULL;
508 ipc.shtx.flags = 0; 507 ipc.shtx.flags = 0;
509 ipc.oif = sk->sk_bound_dev_if; 508 ipc.oif = sk->sk_bound_dev_if;
@@ -645,9 +644,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
645 if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && 644 if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
646 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) 645 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
647 goto out; 646 goto out;
648 inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; 647 inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
649 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) 648 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
650 inet->saddr = 0; /* Use device */ 649 inet->inet_saddr = 0; /* Use device */
651 sk_dst_reset(sk); 650 sk_dst_reset(sk);
652 ret = 0; 651 ret = 0;
653out: return ret; 652out: return ret;
@@ -692,7 +691,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
692 if (err) 691 if (err)
693 goto done; 692 goto done;
694 693
695 sock_recv_timestamp(msg, sk, skb); 694 sock_recv_ts_and_drops(msg, sk, skb);
696 695
697 /* Copy the address. */ 696 /* Copy the address. */
698 if (sin) { 697 if (sin) {
@@ -717,7 +716,7 @@ static int raw_init(struct sock *sk)
717{ 716{
718 struct raw_sock *rp = raw_sk(sk); 717 struct raw_sock *rp = raw_sk(sk);
719 718
720 if (inet_sk(sk)->num == IPPROTO_ICMP) 719 if (inet_sk(sk)->inet_num == IPPROTO_ICMP)
721 memset(&rp->filter, 0, sizeof(rp->filter)); 720 memset(&rp->filter, 0, sizeof(rp->filter));
722 return 0; 721 return 0;
723} 722}
@@ -754,7 +753,7 @@ static int do_raw_setsockopt(struct sock *sk, int level, int optname,
754 char __user *optval, unsigned int optlen) 753 char __user *optval, unsigned int optlen)
755{ 754{
756 if (optname == ICMP_FILTER) { 755 if (optname == ICMP_FILTER) {
757 if (inet_sk(sk)->num != IPPROTO_ICMP) 756 if (inet_sk(sk)->inet_num != IPPROTO_ICMP)
758 return -EOPNOTSUPP; 757 return -EOPNOTSUPP;
759 else 758 else
760 return raw_seticmpfilter(sk, optval, optlen); 759 return raw_seticmpfilter(sk, optval, optlen);
@@ -784,7 +783,7 @@ static int do_raw_getsockopt(struct sock *sk, int level, int optname,
784 char __user *optval, int __user *optlen) 783 char __user *optval, int __user *optlen)
785{ 784{
786 if (optname == ICMP_FILTER) { 785 if (optname == ICMP_FILTER) {
787 if (inet_sk(sk)->num != IPPROTO_ICMP) 786 if (inet_sk(sk)->inet_num != IPPROTO_ICMP)
788 return -EOPNOTSUPP; 787 return -EOPNOTSUPP;
789 else 788 else
790 return raw_geticmpfilter(sk, optval, optlen); 789 return raw_geticmpfilter(sk, optval, optlen);
@@ -943,10 +942,10 @@ EXPORT_SYMBOL_GPL(raw_seq_stop);
943static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) 942static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
944{ 943{
945 struct inet_sock *inet = inet_sk(sp); 944 struct inet_sock *inet = inet_sk(sp);
946 __be32 dest = inet->daddr, 945 __be32 dest = inet->inet_daddr,
947 src = inet->rcv_saddr; 946 src = inet->inet_rcv_saddr;
948 __u16 destp = 0, 947 __u16 destp = 0,
949 srcp = inet->num; 948 srcp = inet->inet_num;
950 949
951 seq_printf(seq, "%4d: %08X:%04X %08X:%04X" 950 seq_printf(seq, "%4d: %08X:%04X %08X:%04X"
952 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", 951 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5b1050a5d874..ff258b57680b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1628,9 +1628,6 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1628 __be32 daddr = iph->daddr; 1628 __be32 daddr = iph->daddr;
1629 unsigned short est_mtu = 0; 1629 unsigned short est_mtu = 0;
1630 1630
1631 if (ipv4_config.no_pmtu_disc)
1632 return 0;
1633
1634 for (k = 0; k < 2; k++) { 1631 for (k = 0; k < 2; k++) {
1635 for (i = 0; i < 2; i++) { 1632 for (i = 0; i < 2; i++) {
1636 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], 1633 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index a6e0e077ac33..3146cc401748 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -276,13 +276,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
276 276
277 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); 277 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
278 278
279 /* check for timestamp cookie support */
280 memset(&tcp_opt, 0, sizeof(tcp_opt));
281 tcp_parse_options(skb, &tcp_opt, 0);
282
283 if (tcp_opt.saw_tstamp)
284 cookie_check_timestamp(&tcp_opt);
285
286 ret = NULL; 279 ret = NULL;
287 req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ 280 req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
288 if (!req) 281 if (!req)
@@ -298,12 +291,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
298 ireq->loc_addr = ip_hdr(skb)->daddr; 291 ireq->loc_addr = ip_hdr(skb)->daddr;
299 ireq->rmt_addr = ip_hdr(skb)->saddr; 292 ireq->rmt_addr = ip_hdr(skb)->saddr;
300 ireq->ecn_ok = 0; 293 ireq->ecn_ok = 0;
301 ireq->snd_wscale = tcp_opt.snd_wscale;
302 ireq->rcv_wscale = tcp_opt.rcv_wscale;
303 ireq->sack_ok = tcp_opt.sack_ok;
304 ireq->wscale_ok = tcp_opt.wscale_ok;
305 ireq->tstamp_ok = tcp_opt.saw_tstamp;
306 req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
307 294
308 /* We throwed the options of the initial SYN away, so we hope 295 /* We throwed the options of the initial SYN away, so we hope
309 * the ACK carries the same options again (see RFC1122 4.2.3.8) 296 * the ACK carries the same options again (see RFC1122 4.2.3.8)
@@ -333,7 +320,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
333 * no easy way to do this. 320 * no easy way to do this.
334 */ 321 */
335 { 322 {
336 struct flowi fl = { .nl_u = { .ip4_u = 323 struct flowi fl = { .mark = sk->sk_mark,
324 .nl_u = { .ip4_u =
337 { .daddr = ((opt && opt->srr) ? 325 { .daddr = ((opt && opt->srr) ?
338 opt->faddr : 326 opt->faddr :
339 ireq->rmt_addr), 327 ireq->rmt_addr),
@@ -351,6 +339,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
351 } 339 }
352 } 340 }
353 341
342 /* check for timestamp cookie support */
343 memset(&tcp_opt, 0, sizeof(tcp_opt));
344 tcp_parse_options(skb, &tcp_opt, 0, &rt->u.dst);
345
346 if (tcp_opt.saw_tstamp)
347 cookie_check_timestamp(&tcp_opt);
348
349 ireq->snd_wscale = tcp_opt.snd_wscale;
350 ireq->rcv_wscale = tcp_opt.rcv_wscale;
351 ireq->sack_ok = tcp_opt.sack_ok;
352 ireq->wscale_ok = tcp_opt.wscale_ok;
353 ireq->tstamp_ok = tcp_opt.saw_tstamp;
354 req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
355
354 /* Try to redo what tcp_v4_send_synack did. */ 356 /* Try to redo what tcp_v4_send_synack did. */
355 req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW); 357 req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW);
356 358
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 98440ad82558..e0cfa633680a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2037,7 +2037,7 @@ int tcp_disconnect(struct sock *sk, int flags)
2037 __skb_queue_purge(&sk->sk_async_wait_queue); 2037 __skb_queue_purge(&sk->sk_async_wait_queue);
2038#endif 2038#endif
2039 2039
2040 inet->dport = 0; 2040 inet->inet_dport = 0;
2041 2041
2042 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 2042 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
2043 inet_reset_saddr(sk); 2043 inet_reset_saddr(sk);
@@ -2061,7 +2061,7 @@ int tcp_disconnect(struct sock *sk, int flags)
2061 memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); 2061 memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
2062 __sk_dst_reset(sk); 2062 __sk_dst_reset(sk);
2063 2063
2064 WARN_ON(inet->num && !icsk->icsk_bind_hash); 2064 WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
2065 2065
2066 sk->sk_error_report(sk); 2066 sk->sk_error_report(sk);
2067 return err; 2067 return err;
@@ -2898,11 +2898,10 @@ void __init tcp_init(void)
2898 (totalram_pages >= 128 * 1024) ? 2898 (totalram_pages >= 128 * 1024) ?
2899 13 : 15, 2899 13 : 15,
2900 0, 2900 0,
2901 &tcp_hashinfo.ehash_size,
2902 NULL, 2901 NULL,
2902 &tcp_hashinfo.ehash_mask,
2903 thash_entries ? 0 : 512 * 1024); 2903 thash_entries ? 0 : 512 * 1024);
2904 tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; 2904 for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) {
2905 for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
2906 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); 2905 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i);
2907 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i); 2906 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i);
2908 } 2907 }
@@ -2911,7 +2910,7 @@ void __init tcp_init(void)
2911 tcp_hashinfo.bhash = 2910 tcp_hashinfo.bhash =
2912 alloc_large_system_hash("TCP bind", 2911 alloc_large_system_hash("TCP bind",
2913 sizeof(struct inet_bind_hashbucket), 2912 sizeof(struct inet_bind_hashbucket),
2914 tcp_hashinfo.ehash_size, 2913 tcp_hashinfo.ehash_mask + 1,
2915 (totalram_pages >= 128 * 1024) ? 2914 (totalram_pages >= 128 * 1024) ?
2916 13 : 15, 2915 13 : 15,
2917 0, 2916 0,
@@ -2966,8 +2965,8 @@ void __init tcp_init(void)
2966 sysctl_tcp_rmem[2] = max(87380, max_share); 2965 sysctl_tcp_rmem[2] = max(87380, max_share);
2967 2966
2968 printk(KERN_INFO "TCP: Hash tables configured " 2967 printk(KERN_INFO "TCP: Hash tables configured "
2969 "(established %d bind %d)\n", 2968 "(established %u bind %u)\n",
2970 tcp_hashinfo.ehash_size, tcp_hashinfo.bhash_size); 2969 tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
2971 2970
2972 tcp_register_congestion_control(&tcp_reno); 2971 tcp_register_congestion_control(&tcp_reno);
2973} 2972}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d86784be7ab3..be0c5bf7bfca 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2300,7 +2300,7 @@ static inline int tcp_fackets_out(struct tcp_sock *tp)
2300 * they differ. Since neither occurs due to loss, TCP should really 2300 * they differ. Since neither occurs due to loss, TCP should really
2301 * ignore them. 2301 * ignore them.
2302 */ 2302 */
2303static inline int tcp_dupack_heurestics(struct tcp_sock *tp) 2303static inline int tcp_dupack_heuristics(struct tcp_sock *tp)
2304{ 2304{
2305 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; 2305 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2306} 2306}
@@ -2425,7 +2425,7 @@ static int tcp_time_to_recover(struct sock *sk)
2425 return 1; 2425 return 1;
2426 2426
2427 /* Not-A-Trick#2 : Classic rule... */ 2427 /* Not-A-Trick#2 : Classic rule... */
2428 if (tcp_dupack_heurestics(tp) > tp->reordering) 2428 if (tcp_dupack_heuristics(tp) > tp->reordering)
2429 return 1; 2429 return 1;
2430 2430
2431 /* Trick#3 : when we use RFC2988 timer restart, fast 2431 /* Trick#3 : when we use RFC2988 timer restart, fast
@@ -3698,7 +3698,7 @@ old_ack:
3698 * the fast version below fails. 3698 * the fast version below fails.
3699 */ 3699 */
3700void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, 3700void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3701 int estab) 3701 int estab, struct dst_entry *dst)
3702{ 3702{
3703 unsigned char *ptr; 3703 unsigned char *ptr;
3704 struct tcphdr *th = tcp_hdr(skb); 3704 struct tcphdr *th = tcp_hdr(skb);
@@ -3737,7 +3737,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3737 break; 3737 break;
3738 case TCPOPT_WINDOW: 3738 case TCPOPT_WINDOW:
3739 if (opsize == TCPOLEN_WINDOW && th->syn && 3739 if (opsize == TCPOLEN_WINDOW && th->syn &&
3740 !estab && sysctl_tcp_window_scaling) { 3740 !estab && sysctl_tcp_window_scaling &&
3741 !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)) {
3741 __u8 snd_wscale = *(__u8 *)ptr; 3742 __u8 snd_wscale = *(__u8 *)ptr;
3742 opt_rx->wscale_ok = 1; 3743 opt_rx->wscale_ok = 1;
3743 if (snd_wscale > 14) { 3744 if (snd_wscale > 14) {
@@ -3753,7 +3754,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3753 case TCPOPT_TIMESTAMP: 3754 case TCPOPT_TIMESTAMP:
3754 if ((opsize == TCPOLEN_TIMESTAMP) && 3755 if ((opsize == TCPOLEN_TIMESTAMP) &&
3755 ((estab && opt_rx->tstamp_ok) || 3756 ((estab && opt_rx->tstamp_ok) ||
3756 (!estab && sysctl_tcp_timestamps))) { 3757 (!estab && sysctl_tcp_timestamps &&
3758 !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP)))) {
3757 opt_rx->saw_tstamp = 1; 3759 opt_rx->saw_tstamp = 1;
3758 opt_rx->rcv_tsval = get_unaligned_be32(ptr); 3760 opt_rx->rcv_tsval = get_unaligned_be32(ptr);
3759 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); 3761 opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
@@ -3761,7 +3763,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3761 break; 3763 break;
3762 case TCPOPT_SACK_PERM: 3764 case TCPOPT_SACK_PERM:
3763 if (opsize == TCPOLEN_SACK_PERM && th->syn && 3765 if (opsize == TCPOLEN_SACK_PERM && th->syn &&
3764 !estab && sysctl_tcp_sack) { 3766 !estab && sysctl_tcp_sack &&
3767 !dst_feature(dst, RTAX_FEATURE_NO_SACK)) {
3765 opt_rx->sack_ok = 1; 3768 opt_rx->sack_ok = 1;
3766 tcp_sack_reset(opt_rx); 3769 tcp_sack_reset(opt_rx);
3767 } 3770 }
@@ -3820,7 +3823,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
3820 if (tcp_parse_aligned_timestamp(tp, th)) 3823 if (tcp_parse_aligned_timestamp(tp, th))
3821 return 1; 3824 return 1;
3822 } 3825 }
3823 tcp_parse_options(skb, &tp->rx_opt, 1); 3826 tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
3824 return 1; 3827 return 1;
3825} 3828}
3826 3829
@@ -4075,8 +4078,10 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
4075static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) 4078static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4076{ 4079{
4077 struct tcp_sock *tp = tcp_sk(sk); 4080 struct tcp_sock *tp = tcp_sk(sk);
4081 struct dst_entry *dst = __sk_dst_get(sk);
4078 4082
4079 if (tcp_is_sack(tp) && sysctl_tcp_dsack) { 4083 if (tcp_is_sack(tp) && sysctl_tcp_dsack &&
4084 !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) {
4080 int mib_idx; 4085 int mib_idx;
4081 4086
4082 if (before(seq, tp->rcv_nxt)) 4087 if (before(seq, tp->rcv_nxt))
@@ -4105,13 +4110,15 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4105static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) 4110static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
4106{ 4111{
4107 struct tcp_sock *tp = tcp_sk(sk); 4112 struct tcp_sock *tp = tcp_sk(sk);
4113 struct dst_entry *dst = __sk_dst_get(sk);
4108 4114
4109 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && 4115 if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
4110 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { 4116 before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4111 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); 4117 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4112 tcp_enter_quickack_mode(sk); 4118 tcp_enter_quickack_mode(sk);
4113 4119
4114 if (tcp_is_sack(tp) && sysctl_tcp_dsack) { 4120 if (tcp_is_sack(tp) && sysctl_tcp_dsack &&
4121 !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) {
4115 u32 end_seq = TCP_SKB_CB(skb)->end_seq; 4122 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4116 4123
4117 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) 4124 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@ -5364,8 +5371,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5364 struct tcp_sock *tp = tcp_sk(sk); 5371 struct tcp_sock *tp = tcp_sk(sk);
5365 struct inet_connection_sock *icsk = inet_csk(sk); 5372 struct inet_connection_sock *icsk = inet_csk(sk);
5366 int saved_clamp = tp->rx_opt.mss_clamp; 5373 int saved_clamp = tp->rx_opt.mss_clamp;
5374 struct dst_entry *dst = __sk_dst_get(sk);
5367 5375
5368 tcp_parse_options(skb, &tp->rx_opt, 0); 5376 tcp_parse_options(skb, &tp->rx_opt, 0, dst);
5369 5377
5370 if (th->ack) { 5378 if (th->ack) {
5371 /* rfc793: 5379 /* rfc793:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7cda24b53f61..657ae334f125 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -165,10 +165,10 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
165 nexthop = inet->opt->faddr; 165 nexthop = inet->opt->faddr;
166 } 166 }
167 167
168 tmp = ip_route_connect(&rt, nexthop, inet->saddr, 168 tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,
169 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 169 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
170 IPPROTO_TCP, 170 IPPROTO_TCP,
171 inet->sport, usin->sin_port, sk, 1); 171 inet->inet_sport, usin->sin_port, sk, 1);
172 if (tmp < 0) { 172 if (tmp < 0) {
173 if (tmp == -ENETUNREACH) 173 if (tmp == -ENETUNREACH)
174 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 174 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
@@ -183,11 +183,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
183 if (!inet->opt || !inet->opt->srr) 183 if (!inet->opt || !inet->opt->srr)
184 daddr = rt->rt_dst; 184 daddr = rt->rt_dst;
185 185
186 if (!inet->saddr) 186 if (!inet->inet_saddr)
187 inet->saddr = rt->rt_src; 187 inet->inet_saddr = rt->rt_src;
188 inet->rcv_saddr = inet->saddr; 188 inet->inet_rcv_saddr = inet->inet_saddr;
189 189
190 if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) { 190 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
191 /* Reset inherited state */ 191 /* Reset inherited state */
192 tp->rx_opt.ts_recent = 0; 192 tp->rx_opt.ts_recent = 0;
193 tp->rx_opt.ts_recent_stamp = 0; 193 tp->rx_opt.ts_recent_stamp = 0;
@@ -210,8 +210,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
210 } 210 }
211 } 211 }
212 212
213 inet->dport = usin->sin_port; 213 inet->inet_dport = usin->sin_port;
214 inet->daddr = daddr; 214 inet->inet_daddr = daddr;
215 215
216 inet_csk(sk)->icsk_ext_hdr_len = 0; 216 inet_csk(sk)->icsk_ext_hdr_len = 0;
217 if (inet->opt) 217 if (inet->opt)
@@ -230,7 +230,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
230 goto failure; 230 goto failure;
231 231
232 err = ip_route_newports(&rt, IPPROTO_TCP, 232 err = ip_route_newports(&rt, IPPROTO_TCP,
233 inet->sport, inet->dport, sk); 233 inet->inet_sport, inet->inet_dport, sk);
234 if (err) 234 if (err)
235 goto failure; 235 goto failure;
236 236
@@ -239,12 +239,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
239 sk_setup_caps(sk, &rt->u.dst); 239 sk_setup_caps(sk, &rt->u.dst);
240 240
241 if (!tp->write_seq) 241 if (!tp->write_seq)
242 tp->write_seq = secure_tcp_sequence_number(inet->saddr, 242 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
243 inet->daddr, 243 inet->inet_daddr,
244 inet->sport, 244 inet->inet_sport,
245 usin->sin_port); 245 usin->sin_port);
246 246
247 inet->id = tp->write_seq ^ jiffies; 247 inet->inet_id = tp->write_seq ^ jiffies;
248 248
249 err = tcp_connect(sk); 249 err = tcp_connect(sk);
250 rt = NULL; 250 rt = NULL;
@@ -261,7 +261,7 @@ failure:
261 tcp_set_state(sk, TCP_CLOSE); 261 tcp_set_state(sk, TCP_CLOSE);
262 ip_rt_put(rt); 262 ip_rt_put(rt);
263 sk->sk_route_caps = 0; 263 sk->sk_route_caps = 0;
264 inet->dport = 0; 264 inet->inet_dport = 0;
265 return err; 265 return err;
266} 266}
267 267
@@ -520,12 +520,13 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
520 struct tcphdr *th = tcp_hdr(skb); 520 struct tcphdr *th = tcp_hdr(skb);
521 521
522 if (skb->ip_summed == CHECKSUM_PARTIAL) { 522 if (skb->ip_summed == CHECKSUM_PARTIAL) {
523 th->check = ~tcp_v4_check(len, inet->saddr, 523 th->check = ~tcp_v4_check(len, inet->inet_saddr,
524 inet->daddr, 0); 524 inet->inet_daddr, 0);
525 skb->csum_start = skb_transport_header(skb) - skb->head; 525 skb->csum_start = skb_transport_header(skb) - skb->head;
526 skb->csum_offset = offsetof(struct tcphdr, check); 526 skb->csum_offset = offsetof(struct tcphdr, check);
527 } else { 527 } else {
528 th->check = tcp_v4_check(len, inet->saddr, inet->daddr, 528 th->check = tcp_v4_check(len, inet->inet_saddr,
529 inet->inet_daddr,
529 csum_partial(th, 530 csum_partial(th,
530 th->doff << 2, 531 th->doff << 2,
531 skb->csum)); 532 skb->csum));
@@ -848,7 +849,7 @@ static struct tcp_md5sig_key *
848struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, 849struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
849 struct sock *addr_sk) 850 struct sock *addr_sk)
850{ 851{
851 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr); 852 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
852} 853}
853 854
854EXPORT_SYMBOL(tcp_v4_md5_lookup); 855EXPORT_SYMBOL(tcp_v4_md5_lookup);
@@ -923,7 +924,7 @@ EXPORT_SYMBOL(tcp_v4_md5_do_add);
923static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, 924static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
924 u8 *newkey, u8 newkeylen) 925 u8 *newkey, u8 newkeylen)
925{ 926{
926 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr, 927 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
927 newkey, newkeylen); 928 newkey, newkeylen);
928} 929}
929 930
@@ -1089,8 +1090,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1089 __be32 saddr, daddr; 1090 __be32 saddr, daddr;
1090 1091
1091 if (sk) { 1092 if (sk) {
1092 saddr = inet_sk(sk)->saddr; 1093 saddr = inet_sk(sk)->inet_saddr;
1093 daddr = inet_sk(sk)->daddr; 1094 daddr = inet_sk(sk)->inet_daddr;
1094 } else if (req) { 1095 } else if (req) {
1095 saddr = inet_rsk(req)->loc_addr; 1096 saddr = inet_rsk(req)->loc_addr;
1096 daddr = inet_rsk(req)->rmt_addr; 1097 daddr = inet_rsk(req)->rmt_addr;
@@ -1256,11 +1257,21 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1256 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; 1257 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1257#endif 1258#endif
1258 1259
1260 ireq = inet_rsk(req);
1261 ireq->loc_addr = daddr;
1262 ireq->rmt_addr = saddr;
1263 ireq->no_srccheck = inet_sk(sk)->transparent;
1264 ireq->opt = tcp_v4_save_options(sk, skb);
1265
1266 dst = inet_csk_route_req(sk, req);
1267 if(!dst)
1268 goto drop_and_free;
1269
1259 tcp_clear_options(&tmp_opt); 1270 tcp_clear_options(&tmp_opt);
1260 tmp_opt.mss_clamp = 536; 1271 tmp_opt.mss_clamp = 536;
1261 tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss; 1272 tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
1262 1273
1263 tcp_parse_options(skb, &tmp_opt, 0); 1274 tcp_parse_options(skb, &tmp_opt, 0, dst);
1264 1275
1265 if (want_cookie && !tmp_opt.saw_tstamp) 1276 if (want_cookie && !tmp_opt.saw_tstamp)
1266 tcp_clear_options(&tmp_opt); 1277 tcp_clear_options(&tmp_opt);
@@ -1269,14 +1280,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1269 1280
1270 tcp_openreq_init(req, &tmp_opt, skb); 1281 tcp_openreq_init(req, &tmp_opt, skb);
1271 1282
1272 ireq = inet_rsk(req);
1273 ireq->loc_addr = daddr;
1274 ireq->rmt_addr = saddr;
1275 ireq->no_srccheck = inet_sk(sk)->transparent;
1276 ireq->opt = tcp_v4_save_options(sk, skb);
1277
1278 if (security_inet_conn_request(sk, skb, req)) 1283 if (security_inet_conn_request(sk, skb, req))
1279 goto drop_and_free; 1284 goto drop_and_release;
1280 1285
1281 if (!want_cookie) 1286 if (!want_cookie)
1282 TCP_ECN_create_request(req, tcp_hdr(skb)); 1287 TCP_ECN_create_request(req, tcp_hdr(skb));
@@ -1301,7 +1306,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1301 */ 1306 */
1302 if (tmp_opt.saw_tstamp && 1307 if (tmp_opt.saw_tstamp &&
1303 tcp_death_row.sysctl_tw_recycle && 1308 tcp_death_row.sysctl_tw_recycle &&
1304 (dst = inet_csk_route_req(sk, req)) != NULL &&
1305 (peer = rt_get_peer((struct rtable *)dst)) != NULL && 1309 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1306 peer->v4daddr == saddr) { 1310 peer->v4daddr == saddr) {
1307 if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL && 1311 if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
@@ -1380,9 +1384,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1380 newtp = tcp_sk(newsk); 1384 newtp = tcp_sk(newsk);
1381 newinet = inet_sk(newsk); 1385 newinet = inet_sk(newsk);
1382 ireq = inet_rsk(req); 1386 ireq = inet_rsk(req);
1383 newinet->daddr = ireq->rmt_addr; 1387 newinet->inet_daddr = ireq->rmt_addr;
1384 newinet->rcv_saddr = ireq->loc_addr; 1388 newinet->inet_rcv_saddr = ireq->loc_addr;
1385 newinet->saddr = ireq->loc_addr; 1389 newinet->inet_saddr = ireq->loc_addr;
1386 newinet->opt = ireq->opt; 1390 newinet->opt = ireq->opt;
1387 ireq->opt = NULL; 1391 ireq->opt = NULL;
1388 newinet->mc_index = inet_iif(skb); 1392 newinet->mc_index = inet_iif(skb);
@@ -1390,7 +1394,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1390 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1394 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1391 if (newinet->opt) 1395 if (newinet->opt)
1392 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; 1396 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1393 newinet->id = newtp->write_seq ^ jiffies; 1397 newinet->inet_id = newtp->write_seq ^ jiffies;
1394 1398
1395 tcp_mtup_init(newsk); 1399 tcp_mtup_init(newsk);
1396 tcp_sync_mss(newsk, dst_mtu(dst)); 1400 tcp_sync_mss(newsk, dst_mtu(dst));
@@ -1403,7 +1407,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1403 1407
1404#ifdef CONFIG_TCP_MD5SIG 1408#ifdef CONFIG_TCP_MD5SIG
1405 /* Copy over the MD5 key from the original socket */ 1409 /* Copy over the MD5 key from the original socket */
1406 if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) { 1410 key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1411 if (key != NULL) {
1407 /* 1412 /*
1408 * We're using one, so create a matching key 1413 * We're using one, so create a matching key
1409 * on the newsk structure. If we fail to get 1414 * on the newsk structure. If we fail to get
@@ -1412,7 +1417,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1412 */ 1417 */
1413 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); 1418 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1414 if (newkey != NULL) 1419 if (newkey != NULL)
1415 tcp_v4_md5_do_add(newsk, newinet->daddr, 1420 tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
1416 newkey, key->keylen); 1421 newkey, key->keylen);
1417 newsk->sk_route_caps &= ~NETIF_F_GSO_MASK; 1422 newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1418 } 1423 }
@@ -1711,8 +1716,8 @@ int tcp_v4_remember_stamp(struct sock *sk)
1711 struct inet_peer *peer = NULL; 1716 struct inet_peer *peer = NULL;
1712 int release_it = 0; 1717 int release_it = 0;
1713 1718
1714 if (!rt || rt->rt_dst != inet->daddr) { 1719 if (!rt || rt->rt_dst != inet->inet_daddr) {
1715 peer = inet_getpeer(inet->daddr, 1); 1720 peer = inet_getpeer(inet->inet_daddr, 1);
1716 release_it = 1; 1721 release_it = 1;
1717 } else { 1722 } else {
1718 if (!rt->peer) 1723 if (!rt->peer)
@@ -2000,7 +2005,7 @@ static void *established_get_first(struct seq_file *seq)
2000 struct net *net = seq_file_net(seq); 2005 struct net *net = seq_file_net(seq);
2001 void *rc = NULL; 2006 void *rc = NULL;
2002 2007
2003 for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { 2008 for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2004 struct sock *sk; 2009 struct sock *sk;
2005 struct hlist_nulls_node *node; 2010 struct hlist_nulls_node *node;
2006 struct inet_timewait_sock *tw; 2011 struct inet_timewait_sock *tw;
@@ -2061,10 +2066,10 @@ get_tw:
2061 st->state = TCP_SEQ_STATE_ESTABLISHED; 2066 st->state = TCP_SEQ_STATE_ESTABLISHED;
2062 2067
2063 /* Look for next non empty bucket */ 2068 /* Look for next non empty bucket */
2064 while (++st->bucket < tcp_hashinfo.ehash_size && 2069 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2065 empty_bucket(st)) 2070 empty_bucket(st))
2066 ; 2071 ;
2067 if (st->bucket >= tcp_hashinfo.ehash_size) 2072 if (st->bucket > tcp_hashinfo.ehash_mask)
2068 return NULL; 2073 return NULL;
2069 2074
2070 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 2075 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
@@ -2225,7 +2230,7 @@ static void get_openreq4(struct sock *sk, struct request_sock *req,
2225 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n", 2230 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
2226 i, 2231 i,
2227 ireq->loc_addr, 2232 ireq->loc_addr,
2228 ntohs(inet_sk(sk)->sport), 2233 ntohs(inet_sk(sk)->inet_sport),
2229 ireq->rmt_addr, 2234 ireq->rmt_addr,
2230 ntohs(ireq->rmt_port), 2235 ntohs(ireq->rmt_port),
2231 TCP_SYN_RECV, 2236 TCP_SYN_RECV,
@@ -2248,10 +2253,10 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2248 struct tcp_sock *tp = tcp_sk(sk); 2253 struct tcp_sock *tp = tcp_sk(sk);
2249 const struct inet_connection_sock *icsk = inet_csk(sk); 2254 const struct inet_connection_sock *icsk = inet_csk(sk);
2250 struct inet_sock *inet = inet_sk(sk); 2255 struct inet_sock *inet = inet_sk(sk);
2251 __be32 dest = inet->daddr; 2256 __be32 dest = inet->inet_daddr;
2252 __be32 src = inet->rcv_saddr; 2257 __be32 src = inet->inet_rcv_saddr;
2253 __u16 destp = ntohs(inet->dport); 2258 __u16 destp = ntohs(inet->inet_dport);
2254 __u16 srcp = ntohs(inet->sport); 2259 __u16 srcp = ntohs(inet->inet_sport);
2255 2260
2256 if (icsk->icsk_pending == ICSK_TIME_RETRANS) { 2261 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2257 timer_active = 1; 2262 timer_active = 1;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4c03598ed924..a9d34e224cb6 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -100,9 +100,9 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
100 struct tcp_options_received tmp_opt; 100 struct tcp_options_received tmp_opt;
101 int paws_reject = 0; 101 int paws_reject = 0;
102 102
103 tmp_opt.saw_tstamp = 0;
104 if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { 103 if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
105 tcp_parse_options(skb, &tmp_opt, 0); 104 tmp_opt.tstamp_ok = 1;
105 tcp_parse_options(skb, &tmp_opt, 1, NULL);
106 106
107 if (tmp_opt.saw_tstamp) { 107 if (tmp_opt.saw_tstamp) {
108 tmp_opt.ts_recent = tcptw->tw_ts_recent; 108 tmp_opt.ts_recent = tcptw->tw_ts_recent;
@@ -501,9 +501,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
501 struct tcp_options_received tmp_opt; 501 struct tcp_options_received tmp_opt;
502 struct sock *child; 502 struct sock *child;
503 503
504 tmp_opt.saw_tstamp = 0; 504 if ((th->doff > (sizeof(struct tcphdr)>>2)) && (req->ts_recent)) {
505 if (th->doff > (sizeof(struct tcphdr)>>2)) { 505 tmp_opt.tstamp_ok = 1;
506 tcp_parse_options(skb, &tmp_opt, 0); 506 tcp_parse_options(skb, &tmp_opt, 1, NULL);
507 507
508 if (tmp_opt.saw_tstamp) { 508 if (tmp_opt.saw_tstamp) {
509 tmp_opt.ts_recent = req->ts_recent; 509 tmp_opt.ts_recent = req->ts_recent;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index fcd278a7080e..616c686ca253 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -464,6 +464,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
464 struct tcp_md5sig_key **md5) { 464 struct tcp_md5sig_key **md5) {
465 struct tcp_sock *tp = tcp_sk(sk); 465 struct tcp_sock *tp = tcp_sk(sk);
466 unsigned size = 0; 466 unsigned size = 0;
467 struct dst_entry *dst = __sk_dst_get(sk);
467 468
468#ifdef CONFIG_TCP_MD5SIG 469#ifdef CONFIG_TCP_MD5SIG
469 *md5 = tp->af_specific->md5_lookup(sk, sk); 470 *md5 = tp->af_specific->md5_lookup(sk, sk);
@@ -487,18 +488,22 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
487 opts->mss = tcp_advertise_mss(sk); 488 opts->mss = tcp_advertise_mss(sk);
488 size += TCPOLEN_MSS_ALIGNED; 489 size += TCPOLEN_MSS_ALIGNED;
489 490
490 if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { 491 if (likely(sysctl_tcp_timestamps &&
492 !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) &&
493 *md5 == NULL)) {
491 opts->options |= OPTION_TS; 494 opts->options |= OPTION_TS;
492 opts->tsval = TCP_SKB_CB(skb)->when; 495 opts->tsval = TCP_SKB_CB(skb)->when;
493 opts->tsecr = tp->rx_opt.ts_recent; 496 opts->tsecr = tp->rx_opt.ts_recent;
494 size += TCPOLEN_TSTAMP_ALIGNED; 497 size += TCPOLEN_TSTAMP_ALIGNED;
495 } 498 }
496 if (likely(sysctl_tcp_window_scaling)) { 499 if (likely(sysctl_tcp_window_scaling &&
500 !dst_feature(dst, RTAX_FEATURE_NO_WSCALE))) {
497 opts->ws = tp->rx_opt.rcv_wscale; 501 opts->ws = tp->rx_opt.rcv_wscale;
498 opts->options |= OPTION_WSCALE; 502 opts->options |= OPTION_WSCALE;
499 size += TCPOLEN_WSCALE_ALIGNED; 503 size += TCPOLEN_WSCALE_ALIGNED;
500 } 504 }
501 if (likely(sysctl_tcp_sack)) { 505 if (likely(sysctl_tcp_sack &&
506 !dst_feature(dst, RTAX_FEATURE_NO_SACK))) {
502 opts->options |= OPTION_SACK_ADVERTISE; 507 opts->options |= OPTION_SACK_ADVERTISE;
503 if (unlikely(!(OPTION_TS & opts->options))) 508 if (unlikely(!(OPTION_TS & opts->options)))
504 size += TCPOLEN_SACKPERM_ALIGNED; 509 size += TCPOLEN_SACKPERM_ALIGNED;
@@ -661,8 +666,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
661 666
662 /* Build TCP header and checksum it. */ 667 /* Build TCP header and checksum it. */
663 th = tcp_hdr(skb); 668 th = tcp_hdr(skb);
664 th->source = inet->sport; 669 th->source = inet->inet_sport;
665 th->dest = inet->dport; 670 th->dest = inet->inet_dport;
666 th->seq = htonl(tcb->seq); 671 th->seq = htonl(tcb->seq);
667 th->ack_seq = htonl(tp->rcv_nxt); 672 th->ack_seq = htonl(tp->rcv_nxt);
668 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | 673 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
@@ -2315,7 +2320,9 @@ static void tcp_connect_init(struct sock *sk)
2315 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. 2320 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
2316 */ 2321 */
2317 tp->tcp_header_len = sizeof(struct tcphdr) + 2322 tp->tcp_header_len = sizeof(struct tcphdr) +
2318 (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); 2323 (sysctl_tcp_timestamps &&
2324 (!dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) ?
2325 TCPOLEN_TSTAMP_ALIGNED : 0));
2319 2326
2320#ifdef CONFIG_TCP_MD5SIG 2327#ifdef CONFIG_TCP_MD5SIG
2321 if (tp->af_specific->md5_lookup(sk, sk) != NULL) 2328 if (tp->af_specific->md5_lookup(sk, sk) != NULL)
@@ -2341,7 +2348,8 @@ static void tcp_connect_init(struct sock *sk)
2341 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), 2348 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
2342 &tp->rcv_wnd, 2349 &tp->rcv_wnd,
2343 &tp->window_clamp, 2350 &tp->window_clamp,
2344 sysctl_tcp_window_scaling, 2351 (sysctl_tcp_window_scaling &&
2352 !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)),
2345 &rcv_wscale); 2353 &rcv_wscale);
2346 2354
2347 tp->rx_opt.rcv_wscale = rcv_wscale; 2355 tp->rx_opt.rcv_wscale = rcv_wscale;
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 59f5b5e7c566..7a3cc2ffad84 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -94,7 +94,8 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
94 const struct inet_sock *inet = inet_sk(sk); 94 const struct inet_sock *inet = inet_sk(sk);
95 95
96 /* Only update if port matches */ 96 /* Only update if port matches */
97 if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port) 97 if ((port == 0 || ntohs(inet->inet_dport) == port ||
98 ntohs(inet->inet_sport) == port)
98 && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { 99 && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) {
99 100
100 spin_lock(&tcp_probe.lock); 101 spin_lock(&tcp_probe.lock);
@@ -103,10 +104,10 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
103 struct tcp_log *p = tcp_probe.log + tcp_probe.head; 104 struct tcp_log *p = tcp_probe.log + tcp_probe.head;
104 105
105 p->tstamp = ktime_get(); 106 p->tstamp = ktime_get();
106 p->saddr = inet->saddr; 107 p->saddr = inet->inet_saddr;
107 p->sport = inet->sport; 108 p->sport = inet->inet_sport;
108 p->daddr = inet->daddr; 109 p->daddr = inet->inet_daddr;
109 p->dport = inet->dport; 110 p->dport = inet->inet_dport;
110 p->length = skb->len; 111 p->length = skb->len;
111 p->snd_nxt = tp->snd_nxt; 112 p->snd_nxt = tp->snd_nxt;
112 p->snd_una = tp->snd_una; 113 p->snd_una = tp->snd_una;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index cdb2ca7684d4..8353a538cd4c 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -141,14 +141,14 @@ static int tcp_write_timeout(struct sock *sk)
141 141
142 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 142 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
143 if (icsk->icsk_retransmits) 143 if (icsk->icsk_retransmits)
144 dst_negative_advice(&sk->sk_dst_cache); 144 dst_negative_advice(&sk->sk_dst_cache, sk);
145 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 145 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
146 } else { 146 } else {
147 if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { 147 if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
148 /* Black hole detection */ 148 /* Black hole detection */
149 tcp_mtu_probing(icsk, sk); 149 tcp_mtu_probing(icsk, sk);
150 150
151 dst_negative_advice(&sk->sk_dst_cache); 151 dst_negative_advice(&sk->sk_dst_cache, sk);
152 } 152 }
153 153
154 retry_until = sysctl_tcp_retries2; 154 retry_until = sysctl_tcp_retries2;
@@ -303,15 +303,15 @@ void tcp_retransmit_timer(struct sock *sk)
303 struct inet_sock *inet = inet_sk(sk); 303 struct inet_sock *inet = inet_sk(sk);
304 if (sk->sk_family == AF_INET) { 304 if (sk->sk_family == AF_INET) {
305 LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", 305 LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
306 &inet->daddr, ntohs(inet->dport), 306 &inet->inet_daddr, ntohs(inet->inet_dport),
307 inet->num, tp->snd_una, tp->snd_nxt); 307 inet->inet_num, tp->snd_una, tp->snd_nxt);
308 } 308 }
309#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 309#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
310 else if (sk->sk_family == AF_INET6) { 310 else if (sk->sk_family == AF_INET6) {
311 struct ipv6_pinfo *np = inet6_sk(sk); 311 struct ipv6_pinfo *np = inet6_sk(sk);
312 LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", 312 LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
313 &np->daddr, ntohs(inet->dport), 313 &np->daddr, ntohs(inet->inet_dport),
314 inet->num, tp->snd_una, tp->snd_nxt); 314 inet->inet_num, tp->snd_una, tp->snd_nxt);
315 } 315 }
316#endif 316#endif
317#endif 317#endif
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0fa9f70e4b19..d73e9170536b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -106,7 +106,7 @@
106#include <net/xfrm.h> 106#include <net/xfrm.h>
107#include "udp_impl.h" 107#include "udp_impl.h"
108 108
109struct udp_table udp_table; 109struct udp_table udp_table __read_mostly;
110EXPORT_SYMBOL(udp_table); 110EXPORT_SYMBOL(udp_table);
111 111
112int sysctl_udp_mem[3] __read_mostly; 112int sysctl_udp_mem[3] __read_mostly;
@@ -121,14 +121,16 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min);
121atomic_t udp_memory_allocated; 121atomic_t udp_memory_allocated;
122EXPORT_SYMBOL(udp_memory_allocated); 122EXPORT_SYMBOL(udp_memory_allocated);
123 123
124#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE) 124#define MAX_UDP_PORTS 65536
125#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
125 126
126static int udp_lib_lport_inuse(struct net *net, __u16 num, 127static int udp_lib_lport_inuse(struct net *net, __u16 num,
127 const struct udp_hslot *hslot, 128 const struct udp_hslot *hslot,
128 unsigned long *bitmap, 129 unsigned long *bitmap,
129 struct sock *sk, 130 struct sock *sk,
130 int (*saddr_comp)(const struct sock *sk1, 131 int (*saddr_comp)(const struct sock *sk1,
131 const struct sock *sk2)) 132 const struct sock *sk2),
133 unsigned int log)
132{ 134{
133 struct sock *sk2; 135 struct sock *sk2;
134 struct hlist_nulls_node *node; 136 struct hlist_nulls_node *node;
@@ -136,13 +138,13 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
136 sk_nulls_for_each(sk2, node, &hslot->head) 138 sk_nulls_for_each(sk2, node, &hslot->head)
137 if (net_eq(sock_net(sk2), net) && 139 if (net_eq(sock_net(sk2), net) &&
138 sk2 != sk && 140 sk2 != sk &&
139 (bitmap || sk2->sk_hash == num) && 141 (bitmap || udp_sk(sk2)->udp_port_hash == num) &&
140 (!sk2->sk_reuse || !sk->sk_reuse) && 142 (!sk2->sk_reuse || !sk->sk_reuse) &&
141 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if 143 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
142 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 144 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
143 (*saddr_comp)(sk, sk2)) { 145 (*saddr_comp)(sk, sk2)) {
144 if (bitmap) 146 if (bitmap)
145 __set_bit(sk2->sk_hash / UDP_HTABLE_SIZE, 147 __set_bit(udp_sk(sk2)->udp_port_hash >> log,
146 bitmap); 148 bitmap);
147 else 149 else
148 return 1; 150 return 1;
@@ -161,7 +163,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
161 int (*saddr_comp)(const struct sock *sk1, 163 int (*saddr_comp)(const struct sock *sk1,
162 const struct sock *sk2)) 164 const struct sock *sk2))
163{ 165{
164 struct udp_hslot *hslot; 166 struct udp_hslot *hslot, *hslot2;
165 struct udp_table *udptable = sk->sk_prot->h.udp_table; 167 struct udp_table *udptable = sk->sk_prot->h.udp_table;
166 int error = 1; 168 int error = 1;
167 struct net *net = sock_net(sk); 169 struct net *net = sock_net(sk);
@@ -180,13 +182,15 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
180 /* 182 /*
181 * force rand to be an odd multiple of UDP_HTABLE_SIZE 183 * force rand to be an odd multiple of UDP_HTABLE_SIZE
182 */ 184 */
183 rand = (rand | 1) * UDP_HTABLE_SIZE; 185 rand = (rand | 1) * (udptable->mask + 1);
184 for (last = first + UDP_HTABLE_SIZE; first != last; first++) { 186 for (last = first + udptable->mask + 1;
185 hslot = &udptable->hash[udp_hashfn(net, first)]; 187 first != last;
188 first++) {
189 hslot = udp_hashslot(udptable, net, first);
186 bitmap_zero(bitmap, PORTS_PER_CHAIN); 190 bitmap_zero(bitmap, PORTS_PER_CHAIN);
187 spin_lock_bh(&hslot->lock); 191 spin_lock_bh(&hslot->lock);
188 udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, 192 udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
189 saddr_comp); 193 saddr_comp, udptable->log);
190 194
191 snum = first; 195 snum = first;
192 /* 196 /*
@@ -196,7 +200,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
196 */ 200 */
197 do { 201 do {
198 if (low <= snum && snum <= high && 202 if (low <= snum && snum <= high &&
199 !test_bit(snum / UDP_HTABLE_SIZE, bitmap)) 203 !test_bit(snum >> udptable->log, bitmap))
200 goto found; 204 goto found;
201 snum += rand; 205 snum += rand;
202 } while (snum != first); 206 } while (snum != first);
@@ -204,17 +208,27 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
204 } 208 }
205 goto fail; 209 goto fail;
206 } else { 210 } else {
207 hslot = &udptable->hash[udp_hashfn(net, snum)]; 211 hslot = udp_hashslot(udptable, net, snum);
208 spin_lock_bh(&hslot->lock); 212 spin_lock_bh(&hslot->lock);
209 if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp)) 213 if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk,
214 saddr_comp, 0))
210 goto fail_unlock; 215 goto fail_unlock;
211 } 216 }
212found: 217found:
213 inet_sk(sk)->num = snum; 218 inet_sk(sk)->inet_num = snum;
214 sk->sk_hash = snum; 219 udp_sk(sk)->udp_port_hash = snum;
220 udp_sk(sk)->udp_portaddr_hash ^= snum;
215 if (sk_unhashed(sk)) { 221 if (sk_unhashed(sk)) {
216 sk_nulls_add_node_rcu(sk, &hslot->head); 222 sk_nulls_add_node_rcu(sk, &hslot->head);
223 hslot->count++;
217 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 224 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
225
226 hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
227 spin_lock(&hslot2->lock);
228 hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
229 &hslot2->head);
230 hslot2->count++;
231 spin_unlock(&hslot2->lock);
218 } 232 }
219 error = 0; 233 error = 0;
220fail_unlock: 234fail_unlock:
@@ -229,12 +243,23 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
229 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); 243 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
230 244
231 return (!ipv6_only_sock(sk2) && 245 return (!ipv6_only_sock(sk2) &&
232 (!inet1->rcv_saddr || !inet2->rcv_saddr || 246 (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr ||
233 inet1->rcv_saddr == inet2->rcv_saddr)); 247 inet1->inet_rcv_saddr == inet2->inet_rcv_saddr));
248}
249
250static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr,
251 unsigned int port)
252{
253 return jhash_1word(saddr, net_hash_mix(net)) ^ port;
234} 254}
235 255
236int udp_v4_get_port(struct sock *sk, unsigned short snum) 256int udp_v4_get_port(struct sock *sk, unsigned short snum)
237{ 257{
258 /* precompute partial secondary hash */
259 udp_sk(sk)->udp_portaddr_hash =
260 udp4_portaddr_hash(sock_net(sk),
261 inet_sk(sk)->inet_rcv_saddr,
262 0);
238 return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); 263 return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal);
239} 264}
240 265
@@ -244,23 +269,61 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
244{ 269{
245 int score = -1; 270 int score = -1;
246 271
247 if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && 272 if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
248 !ipv6_only_sock(sk)) { 273 !ipv6_only_sock(sk)) {
249 struct inet_sock *inet = inet_sk(sk); 274 struct inet_sock *inet = inet_sk(sk);
250 275
251 score = (sk->sk_family == PF_INET ? 1 : 0); 276 score = (sk->sk_family == PF_INET ? 1 : 0);
252 if (inet->rcv_saddr) { 277 if (inet->inet_rcv_saddr) {
253 if (inet->rcv_saddr != daddr) 278 if (inet->inet_rcv_saddr != daddr)
279 return -1;
280 score += 2;
281 }
282 if (inet->inet_daddr) {
283 if (inet->inet_daddr != saddr)
284 return -1;
285 score += 2;
286 }
287 if (inet->inet_dport) {
288 if (inet->inet_dport != sport)
289 return -1;
290 score += 2;
291 }
292 if (sk->sk_bound_dev_if) {
293 if (sk->sk_bound_dev_if != dif)
254 return -1; 294 return -1;
255 score += 2; 295 score += 2;
256 } 296 }
257 if (inet->daddr) { 297 }
258 if (inet->daddr != saddr) 298 return score;
299}
300
301/*
302 * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num)
303 */
304#define SCORE2_MAX (1 + 2 + 2 + 2)
305static inline int compute_score2(struct sock *sk, struct net *net,
306 __be32 saddr, __be16 sport,
307 __be32 daddr, unsigned int hnum, int dif)
308{
309 int score = -1;
310
311 if (net_eq(sock_net(sk), net) && !ipv6_only_sock(sk)) {
312 struct inet_sock *inet = inet_sk(sk);
313
314 if (inet->inet_rcv_saddr != daddr)
315 return -1;
316 if (inet->inet_num != hnum)
317 return -1;
318
319 score = (sk->sk_family == PF_INET ? 1 : 0);
320 if (inet->inet_daddr) {
321 if (inet->inet_daddr != saddr)
259 return -1; 322 return -1;
260 score += 2; 323 score += 2;
261 } 324 }
262 if (inet->dport) { 325 if (inet->inet_dport) {
263 if (inet->dport != sport) 326 if (inet->inet_dport != sport)
264 return -1; 327 return -1;
265 score += 2; 328 score += 2;
266 } 329 }
@@ -273,6 +336,53 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
273 return score; 336 return score;
274} 337}
275 338
339#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \
340 hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node)
341
342/* called with read_rcu_lock() */
343static struct sock *udp4_lib_lookup2(struct net *net,
344 __be32 saddr, __be16 sport,
345 __be32 daddr, unsigned int hnum, int dif,
346 struct udp_hslot *hslot2, unsigned int slot2)
347{
348 struct sock *sk, *result;
349 struct hlist_nulls_node *node;
350 int score, badness;
351
352begin:
353 result = NULL;
354 badness = -1;
355 udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
356 score = compute_score2(sk, net, saddr, sport,
357 daddr, hnum, dif);
358 if (score > badness) {
359 result = sk;
360 badness = score;
361 if (score == SCORE2_MAX)
362 goto exact_match;
363 }
364 }
365 /*
366 * if the nulls value we got at the end of this lookup is
367 * not the expected one, we must restart lookup.
368 * We probably met an item that was moved to another chain.
369 */
370 if (get_nulls_value(node) != slot2)
371 goto begin;
372
373 if (result) {
374exact_match:
375 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
376 result = NULL;
377 else if (unlikely(compute_score2(result, net, saddr, sport,
378 daddr, hnum, dif) < badness)) {
379 sock_put(result);
380 goto begin;
381 }
382 }
383 return result;
384}
385
276/* UDP is nearly always wildcards out the wazoo, it makes no sense to try 386/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
277 * harder than this. -DaveM 387 * harder than this. -DaveM
278 */ 388 */
@@ -283,11 +393,35 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
283 struct sock *sk, *result; 393 struct sock *sk, *result;
284 struct hlist_nulls_node *node; 394 struct hlist_nulls_node *node;
285 unsigned short hnum = ntohs(dport); 395 unsigned short hnum = ntohs(dport);
286 unsigned int hash = udp_hashfn(net, hnum); 396 unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
287 struct udp_hslot *hslot = &udptable->hash[hash]; 397 struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
288 int score, badness; 398 int score, badness;
289 399
290 rcu_read_lock(); 400 rcu_read_lock();
401 if (hslot->count > 10) {
402 hash2 = udp4_portaddr_hash(net, daddr, hnum);
403 slot2 = hash2 & udptable->mask;
404 hslot2 = &udptable->hash2[slot2];
405 if (hslot->count < hslot2->count)
406 goto begin;
407
408 result = udp4_lib_lookup2(net, saddr, sport,
409 daddr, hnum, dif,
410 hslot2, slot2);
411 if (!result) {
412 hash2 = udp4_portaddr_hash(net, INADDR_ANY, hnum);
413 slot2 = hash2 & udptable->mask;
414 hslot2 = &udptable->hash2[slot2];
415 if (hslot->count < hslot2->count)
416 goto begin;
417
418 result = udp4_lib_lookup2(net, INADDR_ANY, sport,
419 daddr, hnum, dif,
420 hslot2, slot2);
421 }
422 rcu_read_unlock();
423 return result;
424 }
291begin: 425begin:
292 result = NULL; 426 result = NULL;
293 badness = -1; 427 badness = -1;
@@ -304,7 +438,7 @@ begin:
304 * not the expected one, we must restart lookup. 438 * not the expected one, we must restart lookup.
305 * We probably met an item that was moved to another chain. 439 * We probably met an item that was moved to another chain.
306 */ 440 */
307 if (get_nulls_value(node) != hash) 441 if (get_nulls_value(node) != slot)
308 goto begin; 442 goto begin;
309 443
310 if (result) { 444 if (result) {
@@ -355,10 +489,11 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
355 struct inet_sock *inet = inet_sk(s); 489 struct inet_sock *inet = inet_sk(s);
356 490
357 if (!net_eq(sock_net(s), net) || 491 if (!net_eq(sock_net(s), net) ||
358 s->sk_hash != hnum || 492 udp_sk(s)->udp_port_hash != hnum ||
359 (inet->daddr && inet->daddr != rmt_addr) || 493 (inet->inet_daddr && inet->inet_daddr != rmt_addr) ||
360 (inet->dport != rmt_port && inet->dport) || 494 (inet->inet_dport != rmt_port && inet->inet_dport) ||
361 (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || 495 (inet->inet_rcv_saddr &&
496 inet->inet_rcv_saddr != loc_addr) ||
362 ipv6_only_sock(s) || 497 ipv6_only_sock(s) ||
363 (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) 498 (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
364 continue; 499 continue;
@@ -642,14 +777,14 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
642 } else { 777 } else {
643 if (sk->sk_state != TCP_ESTABLISHED) 778 if (sk->sk_state != TCP_ESTABLISHED)
644 return -EDESTADDRREQ; 779 return -EDESTADDRREQ;
645 daddr = inet->daddr; 780 daddr = inet->inet_daddr;
646 dport = inet->dport; 781 dport = inet->inet_dport;
647 /* Open fast path for connected socket. 782 /* Open fast path for connected socket.
648 Route will not be used, if at least one option is set. 783 Route will not be used, if at least one option is set.
649 */ 784 */
650 connected = 1; 785 connected = 1;
651 } 786 }
652 ipc.addr = inet->saddr; 787 ipc.addr = inet->inet_saddr;
653 788
654 ipc.oif = sk->sk_bound_dev_if; 789 ipc.oif = sk->sk_bound_dev_if;
655 err = sock_tx_timestamp(msg, sk, &ipc.shtx); 790 err = sock_tx_timestamp(msg, sk, &ipc.shtx);
@@ -704,7 +839,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
704 .proto = sk->sk_protocol, 839 .proto = sk->sk_protocol,
705 .flags = inet_sk_flowi_flags(sk), 840 .flags = inet_sk_flowi_flags(sk),
706 .uli_u = { .ports = 841 .uli_u = { .ports =
707 { .sport = inet->sport, 842 { .sport = inet->inet_sport,
708 .dport = dport } } }; 843 .dport = dport } } };
709 struct net *net = sock_net(sk); 844 struct net *net = sock_net(sk);
710 845
@@ -748,7 +883,7 @@ back_from_confirm:
748 inet->cork.fl.fl4_dst = daddr; 883 inet->cork.fl.fl4_dst = daddr;
749 inet->cork.fl.fl_ip_dport = dport; 884 inet->cork.fl.fl_ip_dport = dport;
750 inet->cork.fl.fl4_src = saddr; 885 inet->cork.fl.fl4_src = saddr;
751 inet->cork.fl.fl_ip_sport = inet->sport; 886 inet->cork.fl.fl_ip_sport = inet->inet_sport;
752 up->pending = AF_INET; 887 up->pending = AF_INET;
753 888
754do_append_data: 889do_append_data:
@@ -862,6 +997,7 @@ static unsigned int first_packet_length(struct sock *sk)
862 udp_lib_checksum_complete(skb)) { 997 udp_lib_checksum_complete(skb)) {
863 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, 998 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
864 IS_UDPLITE(sk)); 999 IS_UDPLITE(sk));
1000 atomic_inc(&sk->sk_drops);
865 __skb_unlink(skb, rcvq); 1001 __skb_unlink(skb, rcvq);
866 __skb_queue_tail(&list_kill, skb); 1002 __skb_queue_tail(&list_kill, skb);
867 } 1003 }
@@ -982,7 +1118,7 @@ try_again:
982 UDP_INC_STATS_USER(sock_net(sk), 1118 UDP_INC_STATS_USER(sock_net(sk),
983 UDP_MIB_INDATAGRAMS, is_udplite); 1119 UDP_MIB_INDATAGRAMS, is_udplite);
984 1120
985 sock_recv_timestamp(msg, sk, skb); 1121 sock_recv_ts_and_drops(msg, sk, skb);
986 1122
987 /* Copy the address. */ 1123 /* Copy the address. */
988 if (sin) { 1124 if (sin) {
@@ -1023,15 +1159,15 @@ int udp_disconnect(struct sock *sk, int flags)
1023 */ 1159 */
1024 1160
1025 sk->sk_state = TCP_CLOSE; 1161 sk->sk_state = TCP_CLOSE;
1026 inet->daddr = 0; 1162 inet->inet_daddr = 0;
1027 inet->dport = 0; 1163 inet->inet_dport = 0;
1028 sk->sk_bound_dev_if = 0; 1164 sk->sk_bound_dev_if = 0;
1029 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 1165 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
1030 inet_reset_saddr(sk); 1166 inet_reset_saddr(sk);
1031 1167
1032 if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { 1168 if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
1033 sk->sk_prot->unhash(sk); 1169 sk->sk_prot->unhash(sk);
1034 inet->sport = 0; 1170 inet->inet_sport = 0;
1035 } 1171 }
1036 sk_dst_reset(sk); 1172 sk_dst_reset(sk);
1037 return 0; 1173 return 0;
@@ -1042,13 +1178,22 @@ void udp_lib_unhash(struct sock *sk)
1042{ 1178{
1043 if (sk_hashed(sk)) { 1179 if (sk_hashed(sk)) {
1044 struct udp_table *udptable = sk->sk_prot->h.udp_table; 1180 struct udp_table *udptable = sk->sk_prot->h.udp_table;
1045 unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash); 1181 struct udp_hslot *hslot, *hslot2;
1046 struct udp_hslot *hslot = &udptable->hash[hash]; 1182
1183 hslot = udp_hashslot(udptable, sock_net(sk),
1184 udp_sk(sk)->udp_port_hash);
1185 hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
1047 1186
1048 spin_lock_bh(&hslot->lock); 1187 spin_lock_bh(&hslot->lock);
1049 if (sk_nulls_del_node_init_rcu(sk)) { 1188 if (sk_nulls_del_node_init_rcu(sk)) {
1050 inet_sk(sk)->num = 0; 1189 hslot->count--;
1190 inet_sk(sk)->inet_num = 0;
1051 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 1191 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
1192
1193 spin_lock(&hslot2->lock);
1194 hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
1195 hslot2->count--;
1196 spin_unlock(&hslot2->lock);
1052 } 1197 }
1053 spin_unlock_bh(&hslot->lock); 1198 spin_unlock_bh(&hslot->lock);
1054 } 1199 }
@@ -1057,25 +1202,22 @@ EXPORT_SYMBOL(udp_lib_unhash);
1057 1202
1058static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 1203static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1059{ 1204{
1060 int is_udplite = IS_UDPLITE(sk); 1205 int rc = sock_queue_rcv_skb(sk, skb);
1061 int rc; 1206
1207 if (rc < 0) {
1208 int is_udplite = IS_UDPLITE(sk);
1062 1209
1063 if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) {
1064 /* Note that an ENOMEM error is charged twice */ 1210 /* Note that an ENOMEM error is charged twice */
1065 if (rc == -ENOMEM) { 1211 if (rc == -ENOMEM)
1066 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, 1212 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
1067 is_udplite); 1213 is_udplite);
1068 atomic_inc(&sk->sk_drops); 1214 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1069 } 1215 kfree_skb(skb);
1070 goto drop; 1216 return -1;
1071 } 1217 }
1072 1218
1073 return 0; 1219 return 0;
1074 1220
1075drop:
1076 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1077 kfree_skb(skb);
1078 return -1;
1079} 1221}
1080 1222
1081/* returns: 1223/* returns:
@@ -1182,53 +1324,88 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1182 1324
1183drop: 1325drop:
1184 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 1326 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1327 atomic_inc(&sk->sk_drops);
1185 kfree_skb(skb); 1328 kfree_skb(skb);
1186 return -1; 1329 return -1;
1187} 1330}
1188 1331
1332
1333static void flush_stack(struct sock **stack, unsigned int count,
1334 struct sk_buff *skb, unsigned int final)
1335{
1336 unsigned int i;
1337 struct sk_buff *skb1 = NULL;
1338 struct sock *sk;
1339
1340 for (i = 0; i < count; i++) {
1341 sk = stack[i];
1342 if (likely(skb1 == NULL))
1343 skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
1344
1345 if (!skb1) {
1346 atomic_inc(&sk->sk_drops);
1347 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
1348 IS_UDPLITE(sk));
1349 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
1350 IS_UDPLITE(sk));
1351 }
1352
1353 if (skb1 && udp_queue_rcv_skb(sk, skb1) <= 0)
1354 skb1 = NULL;
1355 }
1356 if (unlikely(skb1))
1357 kfree_skb(skb1);
1358}
1359
1189/* 1360/*
1190 * Multicasts and broadcasts go to each listener. 1361 * Multicasts and broadcasts go to each listener.
1191 * 1362 *
1192 * Note: called only from the BH handler context, 1363 * Note: called only from the BH handler context.
1193 * so we don't need to lock the hashes.
1194 */ 1364 */
1195static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, 1365static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
1196 struct udphdr *uh, 1366 struct udphdr *uh,
1197 __be32 saddr, __be32 daddr, 1367 __be32 saddr, __be32 daddr,
1198 struct udp_table *udptable) 1368 struct udp_table *udptable)
1199{ 1369{
1200 struct sock *sk; 1370 struct sock *sk, *stack[256 / sizeof(struct sock *)];
1201 struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; 1371 struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
1202 int dif; 1372 int dif;
1373 unsigned int i, count = 0;
1203 1374
1204 spin_lock(&hslot->lock); 1375 spin_lock(&hslot->lock);
1205 sk = sk_nulls_head(&hslot->head); 1376 sk = sk_nulls_head(&hslot->head);
1206 dif = skb->dev->ifindex; 1377 dif = skb->dev->ifindex;
1207 sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); 1378 sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
1208 if (sk) { 1379 while (sk) {
1209 struct sock *sknext = NULL; 1380 stack[count++] = sk;
1210 1381 sk = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest,
1211 do { 1382 daddr, uh->source, saddr, dif);
1212 struct sk_buff *skb1 = skb; 1383 if (unlikely(count == ARRAY_SIZE(stack))) {
1213 1384 if (!sk)
1214 sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, 1385 break;
1215 daddr, uh->source, saddr, 1386 flush_stack(stack, count, skb, ~0);
1216 dif); 1387 count = 0;
1217 if (sknext) 1388 }
1218 skb1 = skb_clone(skb, GFP_ATOMIC); 1389 }
1219 1390 /*
1220 if (skb1) { 1391 * before releasing chain lock, we must take a reference on sockets
1221 int ret = udp_queue_rcv_skb(sk, skb1); 1392 */
1222 if (ret > 0) 1393 for (i = 0; i < count; i++)
1223 /* we should probably re-process instead 1394 sock_hold(stack[i]);
1224 * of dropping packets here. */ 1395
1225 kfree_skb(skb1);
1226 }
1227 sk = sknext;
1228 } while (sknext);
1229 } else
1230 consume_skb(skb);
1231 spin_unlock(&hslot->lock); 1396 spin_unlock(&hslot->lock);
1397
1398 /*
1399 * do the slow work with no lock held
1400 */
1401 if (count) {
1402 flush_stack(stack, count, skb, count - 1);
1403
1404 for (i = 0; i < count; i++)
1405 sock_put(stack[i]);
1406 } else {
1407 kfree_skb(skb);
1408 }
1232 return 0; 1409 return 0;
1233} 1410}
1234 1411
@@ -1620,9 +1797,14 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
1620 struct udp_iter_state *state = seq->private; 1797 struct udp_iter_state *state = seq->private;
1621 struct net *net = seq_file_net(seq); 1798 struct net *net = seq_file_net(seq);
1622 1799
1623 for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { 1800 for (state->bucket = start; state->bucket <= state->udp_table->mask;
1801 ++state->bucket) {
1624 struct hlist_nulls_node *node; 1802 struct hlist_nulls_node *node;
1625 struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; 1803 struct udp_hslot *hslot = &state->udp_table->hash[state->bucket];
1804
1805 if (hlist_nulls_empty(&hslot->head))
1806 continue;
1807
1626 spin_lock_bh(&hslot->lock); 1808 spin_lock_bh(&hslot->lock);
1627 sk_nulls_for_each(sk, node, &hslot->head) { 1809 sk_nulls_for_each(sk, node, &hslot->head) {
1628 if (!net_eq(sock_net(sk), net)) 1810 if (!net_eq(sock_net(sk), net))
@@ -1647,7 +1829,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
1647 } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); 1829 } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
1648 1830
1649 if (!sk) { 1831 if (!sk) {
1650 if (state->bucket < UDP_HTABLE_SIZE) 1832 if (state->bucket <= state->udp_table->mask)
1651 spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); 1833 spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
1652 return udp_get_first(seq, state->bucket + 1); 1834 return udp_get_first(seq, state->bucket + 1);
1653 } 1835 }
@@ -1667,7 +1849,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
1667static void *udp_seq_start(struct seq_file *seq, loff_t *pos) 1849static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
1668{ 1850{
1669 struct udp_iter_state *state = seq->private; 1851 struct udp_iter_state *state = seq->private;
1670 state->bucket = UDP_HTABLE_SIZE; 1852 state->bucket = MAX_UDP_PORTS;
1671 1853
1672 return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; 1854 return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
1673} 1855}
@@ -1689,7 +1871,7 @@ static void udp_seq_stop(struct seq_file *seq, void *v)
1689{ 1871{
1690 struct udp_iter_state *state = seq->private; 1872 struct udp_iter_state *state = seq->private;
1691 1873
1692 if (state->bucket < UDP_HTABLE_SIZE) 1874 if (state->bucket <= state->udp_table->mask)
1693 spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); 1875 spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
1694} 1876}
1695 1877
@@ -1744,12 +1926,12 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
1744 int bucket, int *len) 1926 int bucket, int *len)
1745{ 1927{
1746 struct inet_sock *inet = inet_sk(sp); 1928 struct inet_sock *inet = inet_sk(sp);
1747 __be32 dest = inet->daddr; 1929 __be32 dest = inet->inet_daddr;
1748 __be32 src = inet->rcv_saddr; 1930 __be32 src = inet->inet_rcv_saddr;
1749 __u16 destp = ntohs(inet->dport); 1931 __u16 destp = ntohs(inet->inet_dport);
1750 __u16 srcp = ntohs(inet->sport); 1932 __u16 srcp = ntohs(inet->inet_sport);
1751 1933
1752 seq_printf(f, "%4d: %08X:%04X %08X:%04X" 1934 seq_printf(f, "%5d: %08X:%04X %08X:%04X"
1753 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", 1935 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n",
1754 bucket, src, srcp, dest, destp, sp->sk_state, 1936 bucket, src, srcp, dest, destp, sp->sk_state,
1755 sk_wmem_alloc_get(sp), 1937 sk_wmem_alloc_get(sp),
@@ -1815,21 +1997,60 @@ void udp4_proc_exit(void)
1815} 1997}
1816#endif /* CONFIG_PROC_FS */ 1998#endif /* CONFIG_PROC_FS */
1817 1999
1818void __init udp_table_init(struct udp_table *table) 2000static __initdata unsigned long uhash_entries;
2001static int __init set_uhash_entries(char *str)
1819{ 2002{
1820 int i; 2003 if (!str)
2004 return 0;
2005 uhash_entries = simple_strtoul(str, &str, 0);
2006 if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN)
2007 uhash_entries = UDP_HTABLE_SIZE_MIN;
2008 return 1;
2009}
2010__setup("uhash_entries=", set_uhash_entries);
1821 2011
1822 for (i = 0; i < UDP_HTABLE_SIZE; i++) { 2012void __init udp_table_init(struct udp_table *table, const char *name)
2013{
2014 unsigned int i;
2015
2016 if (!CONFIG_BASE_SMALL)
2017 table->hash = alloc_large_system_hash(name,
2018 2 * sizeof(struct udp_hslot),
2019 uhash_entries,
2020 21, /* one slot per 2 MB */
2021 0,
2022 &table->log,
2023 &table->mask,
2024 64 * 1024);
2025 /*
2026 * Make sure hash table has the minimum size
2027 */
2028 if (CONFIG_BASE_SMALL || table->mask < UDP_HTABLE_SIZE_MIN - 1) {
2029 table->hash = kmalloc(UDP_HTABLE_SIZE_MIN *
2030 2 * sizeof(struct udp_hslot), GFP_KERNEL);
2031 if (!table->hash)
2032 panic(name);
2033 table->log = ilog2(UDP_HTABLE_SIZE_MIN);
2034 table->mask = UDP_HTABLE_SIZE_MIN - 1;
2035 }
2036 table->hash2 = table->hash + (table->mask + 1);
2037 for (i = 0; i <= table->mask; i++) {
1823 INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); 2038 INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i);
2039 table->hash[i].count = 0;
1824 spin_lock_init(&table->hash[i].lock); 2040 spin_lock_init(&table->hash[i].lock);
1825 } 2041 }
2042 for (i = 0; i <= table->mask; i++) {
2043 INIT_HLIST_NULLS_HEAD(&table->hash2[i].head, i);
2044 table->hash2[i].count = 0;
2045 spin_lock_init(&table->hash2[i].lock);
2046 }
1826} 2047}
1827 2048
1828void __init udp_init(void) 2049void __init udp_init(void)
1829{ 2050{
1830 unsigned long nr_pages, limit; 2051 unsigned long nr_pages, limit;
1831 2052
1832 udp_table_init(&udp_table); 2053 udp_table_init(&udp_table, "UDP");
1833 /* Set the pressure threshold up by the same strategy of TCP. It is a 2054 /* Set the pressure threshold up by the same strategy of TCP. It is a
1834 * fraction of global memory that is up to 1/2 at 256 MB, decreasing 2055 * fraction of global memory that is up to 1/2 at 256 MB, decreasing
1835 * toward zero with the amount of memory, with a floor of 128 pages. 2056 * toward zero with the amount of memory, with a floor of 128 pages.
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 95248d7f75ec..66f79513f4a5 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -12,7 +12,7 @@
12 */ 12 */
13#include "udp_impl.h" 13#include "udp_impl.h"
14 14
15struct udp_table udplite_table; 15struct udp_table udplite_table __read_mostly;
16EXPORT_SYMBOL(udplite_table); 16EXPORT_SYMBOL(udplite_table);
17 17
18static int udplite_rcv(struct sk_buff *skb) 18static int udplite_rcv(struct sk_buff *skb)
@@ -64,7 +64,6 @@ static struct inet_protosw udplite4_protosw = {
64 .protocol = IPPROTO_UDPLITE, 64 .protocol = IPPROTO_UDPLITE,
65 .prot = &udplite_prot, 65 .prot = &udplite_prot,
66 .ops = &inet_dgram_ops, 66 .ops = &inet_dgram_ops,
67 .capability = -1,
68 .no_check = 0, /* must checksum (RFC 3828) */ 67 .no_check = 0, /* must checksum (RFC 3828) */
69 .flags = INET_PROTOSW_PERMANENT, 68 .flags = INET_PROTOSW_PERMANENT,
70}; 69};
@@ -110,7 +109,7 @@ static inline int udplite4_proc_init(void)
110 109
111void __init udplite4_register(void) 110void __init udplite4_register(void)
112{ 111{
113 udp_table_init(&udplite_table); 112 udp_table_init(&udplite_table, "UDP-Lite");
114 if (proto_register(&udplite_prot, 1)) 113 if (proto_register(&udplite_prot, 1))
115 goto out_register_err; 114 goto out_register_err;
116 115