aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig1
-rw-r--r--net/ipv4/ah4.c2
-rw-r--r--net/ipv4/arp.c9
-rw-r--r--net/ipv4/devinet.c37
-rw-r--r--net/ipv4/esp4.c554
-rw-r--r--net/ipv4/fib_frontend.c14
-rw-r--r--net/ipv4/fib_hash.c47
-rw-r--r--net/ipv4/fib_semantics.c116
-rw-r--r--net/ipv4/fib_trie.c104
-rw-r--r--net/ipv4/inet_connection_sock.c8
-rw-r--r--net/ipv4/inet_diag.c15
-rw-r--r--net/ipv4/inet_hashtables.c69
-rw-r--r--net/ipv4/ip_output.c7
-rw-r--r--net/ipv4/ipcomp.c7
-rw-r--r--net/ipv4/netfilter/arp_tables.c102
-rw-r--r--net/ipv4/netfilter/arptable_filter.c31
-rw-r--r--net/ipv4/netfilter/ip_queue.c18
-rw-r--r--net/ipv4/netfilter/ip_tables.c112
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c7
-rw-r--r--net/ipv4/netfilter/ipt_recent.c6
-rw-r--r--net/ipv4/netfilter/iptable_filter.c33
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c33
-rw-r--r--net/ipv4/netfilter/iptable_raw.c33
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c14
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c40
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c22
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c42
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c3
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c10
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c16
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_tcp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c16
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_tftp.c2
-rw-r--r--net/ipv4/raw.c42
-rw-r--r--net/ipv4/route.c211
-rw-r--r--net/ipv4/sysctl_net_ipv4.c2
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_ipv4.c15
-rw-r--r--net/ipv4/tcp_output.c1
-rw-r--r--net/ipv4/udp.c25
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv4/xfrm4_tunnel.c4
47 files changed, 1102 insertions, 748 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 24e2b7294bf8..19880b086e71 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -343,6 +343,7 @@ config INET_ESP
343 tristate "IP: ESP transformation" 343 tristate "IP: ESP transformation"
344 select XFRM 344 select XFRM
345 select CRYPTO 345 select CRYPTO
346 select CRYPTO_AEAD
346 select CRYPTO_HMAC 347 select CRYPTO_HMAC
347 select CRYPTO_MD5 348 select CRYPTO_MD5
348 select CRYPTO_CBC 349 select CRYPTO_CBC
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index d76803a3dcae..9d4555ec0b59 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -300,7 +300,7 @@ static void ah_destroy(struct xfrm_state *x)
300} 300}
301 301
302 302
303static struct xfrm_type ah_type = 303static const struct xfrm_type ah_type =
304{ 304{
305 .description = "AH4", 305 .description = "AH4",
306 .owner = THIS_MODULE, 306 .owner = THIS_MODULE,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 5976c598cc4b..8e17f65f4002 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -558,8 +558,9 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt)
558 */ 558 */
559struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, 559struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
560 struct net_device *dev, __be32 src_ip, 560 struct net_device *dev, __be32 src_ip,
561 unsigned char *dest_hw, unsigned char *src_hw, 561 const unsigned char *dest_hw,
562 unsigned char *target_hw) 562 const unsigned char *src_hw,
563 const unsigned char *target_hw)
563{ 564{
564 struct sk_buff *skb; 565 struct sk_buff *skb;
565 struct arphdr *arp; 566 struct arphdr *arp;
@@ -672,8 +673,8 @@ void arp_xmit(struct sk_buff *skb)
672 */ 673 */
673void arp_send(int type, int ptype, __be32 dest_ip, 674void arp_send(int type, int ptype, __be32 dest_ip,
674 struct net_device *dev, __be32 src_ip, 675 struct net_device *dev, __be32 src_ip,
675 unsigned char *dest_hw, unsigned char *src_hw, 676 const unsigned char *dest_hw, const unsigned char *src_hw,
676 unsigned char *target_hw) 677 const unsigned char *target_hw)
677{ 678{
678 struct sk_buff *skb; 679 struct sk_buff *skb;
679 680
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 21f71bf912d5..f282b26f63eb 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -64,7 +64,7 @@
64#include <net/rtnetlink.h> 64#include <net/rtnetlink.h>
65#include <net/net_namespace.h> 65#include <net/net_namespace.h>
66 66
67struct ipv4_devconf ipv4_devconf = { 67static struct ipv4_devconf ipv4_devconf = {
68 .data = { 68 .data = {
69 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, 69 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
70 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, 70 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
@@ -485,46 +485,41 @@ errout:
485 return err; 485 return err;
486} 486}
487 487
488static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) 488static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
489{ 489{
490 struct nlattr *tb[IFA_MAX+1]; 490 struct nlattr *tb[IFA_MAX+1];
491 struct in_ifaddr *ifa; 491 struct in_ifaddr *ifa;
492 struct ifaddrmsg *ifm; 492 struct ifaddrmsg *ifm;
493 struct net_device *dev; 493 struct net_device *dev;
494 struct in_device *in_dev; 494 struct in_device *in_dev;
495 int err = -EINVAL; 495 int err;
496 496
497 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); 497 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
498 if (err < 0) 498 if (err < 0)
499 goto errout; 499 goto errout;
500 500
501 ifm = nlmsg_data(nlh); 501 ifm = nlmsg_data(nlh);
502 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) { 502 err = -EINVAL;
503 err = -EINVAL; 503 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
504 goto errout; 504 goto errout;
505 }
506 505
507 dev = __dev_get_by_index(&init_net, ifm->ifa_index); 506 dev = __dev_get_by_index(net, ifm->ifa_index);
508 if (dev == NULL) { 507 err = -ENODEV;
509 err = -ENODEV; 508 if (dev == NULL)
510 goto errout; 509 goto errout;
511 }
512 510
513 in_dev = __in_dev_get_rtnl(dev); 511 in_dev = __in_dev_get_rtnl(dev);
514 if (in_dev == NULL) { 512 err = -ENOBUFS;
515 err = -ENOBUFS; 513 if (in_dev == NULL)
516 goto errout; 514 goto errout;
517 }
518 515
519 ifa = inet_alloc_ifa(); 516 ifa = inet_alloc_ifa();
520 if (ifa == NULL) { 517 if (ifa == NULL)
521 /* 518 /*
522 * A potential indev allocation can be left alive, it stays 519 * A potential indev allocation can be left alive, it stays
523 * assigned to its device and is destroy with it. 520 * assigned to its device and is destroy with it.
524 */ 521 */
525 err = -ENOBUFS;
526 goto errout; 522 goto errout;
527 }
528 523
529 ipv4_devconf_setall(in_dev); 524 ipv4_devconf_setall(in_dev);
530 in_dev_hold(in_dev); 525 in_dev_hold(in_dev);
@@ -568,7 +563,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
568 if (net != &init_net) 563 if (net != &init_net)
569 return -EINVAL; 564 return -EINVAL;
570 565
571 ifa = rtm_to_ifaddr(nlh); 566 ifa = rtm_to_ifaddr(net, nlh);
572 if (IS_ERR(ifa)) 567 if (IS_ERR(ifa))
573 return PTR_ERR(ifa); 568 return PTR_ERR(ifa);
574 569
@@ -1182,7 +1177,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1182 1177
1183 s_ip_idx = ip_idx = cb->args[1]; 1178 s_ip_idx = ip_idx = cb->args[1];
1184 idx = 0; 1179 idx = 0;
1185 for_each_netdev(&init_net, dev) { 1180 for_each_netdev(net, dev) {
1186 if (idx < s_idx) 1181 if (idx < s_idx)
1187 goto cont; 1182 goto cont;
1188 if (idx > s_idx) 1183 if (idx > s_idx)
@@ -1216,7 +1211,9 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1216 struct sk_buff *skb; 1211 struct sk_buff *skb;
1217 u32 seq = nlh ? nlh->nlmsg_seq : 0; 1212 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1218 int err = -ENOBUFS; 1213 int err = -ENOBUFS;
1214 struct net *net;
1219 1215
1216 net = ifa->ifa_dev->dev->nd_net;
1220 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); 1217 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1221 if (skb == NULL) 1218 if (skb == NULL)
1222 goto errout; 1219 goto errout;
@@ -1228,10 +1225,10 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1228 kfree_skb(skb); 1225 kfree_skb(skb);
1229 goto errout; 1226 goto errout;
1230 } 1227 }
1231 err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); 1228 err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1232errout: 1229errout:
1233 if (err < 0) 1230 if (err < 0)
1234 rtnl_set_sk_err(&init_net, RTNLGRP_IPV4_IFADDR, err); 1231 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1235} 1232}
1236 1233
1237#ifdef CONFIG_SYSCTL 1234#ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 28ea5c77ca23..258d17631b4b 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -1,27 +1,118 @@
1#include <crypto/aead.h>
2#include <crypto/authenc.h>
1#include <linux/err.h> 3#include <linux/err.h>
2#include <linux/module.h> 4#include <linux/module.h>
3#include <net/ip.h> 5#include <net/ip.h>
4#include <net/xfrm.h> 6#include <net/xfrm.h>
5#include <net/esp.h> 7#include <net/esp.h>
6#include <linux/scatterlist.h> 8#include <linux/scatterlist.h>
7#include <linux/crypto.h>
8#include <linux/kernel.h> 9#include <linux/kernel.h>
9#include <linux/pfkeyv2.h> 10#include <linux/pfkeyv2.h>
10#include <linux/random.h> 11#include <linux/rtnetlink.h>
12#include <linux/slab.h>
11#include <linux/spinlock.h> 13#include <linux/spinlock.h>
12#include <linux/in6.h> 14#include <linux/in6.h>
13#include <net/icmp.h> 15#include <net/icmp.h>
14#include <net/protocol.h> 16#include <net/protocol.h>
15#include <net/udp.h> 17#include <net/udp.h>
16 18
19struct esp_skb_cb {
20 struct xfrm_skb_cb xfrm;
21 void *tmp;
22};
23
24#define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
25
26/*
27 * Allocate an AEAD request structure with extra space for SG and IV.
28 *
29 * For alignment considerations the IV is placed at the front, followed
30 * by the request and finally the SG list.
31 *
32 * TODO: Use spare space in skb for this where possible.
33 */
34static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags)
35{
36 unsigned int len;
37
38 len = crypto_aead_ivsize(aead);
39 if (len) {
40 len += crypto_aead_alignmask(aead) &
41 ~(crypto_tfm_ctx_alignment() - 1);
42 len = ALIGN(len, crypto_tfm_ctx_alignment());
43 }
44
45 len += sizeof(struct aead_givcrypt_request) + crypto_aead_reqsize(aead);
46 len = ALIGN(len, __alignof__(struct scatterlist));
47
48 len += sizeof(struct scatterlist) * nfrags;
49
50 return kmalloc(len, GFP_ATOMIC);
51}
52
53static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp)
54{
55 return crypto_aead_ivsize(aead) ?
56 PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp;
57}
58
59static inline struct aead_givcrypt_request *esp_tmp_givreq(
60 struct crypto_aead *aead, u8 *iv)
61{
62 struct aead_givcrypt_request *req;
63
64 req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
65 crypto_tfm_ctx_alignment());
66 aead_givcrypt_set_tfm(req, aead);
67 return req;
68}
69
70static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv)
71{
72 struct aead_request *req;
73
74 req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
75 crypto_tfm_ctx_alignment());
76 aead_request_set_tfm(req, aead);
77 return req;
78}
79
80static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
81 struct aead_request *req)
82{
83 return (void *)ALIGN((unsigned long)(req + 1) +
84 crypto_aead_reqsize(aead),
85 __alignof__(struct scatterlist));
86}
87
88static inline struct scatterlist *esp_givreq_sg(
89 struct crypto_aead *aead, struct aead_givcrypt_request *req)
90{
91 return (void *)ALIGN((unsigned long)(req + 1) +
92 crypto_aead_reqsize(aead),
93 __alignof__(struct scatterlist));
94}
95
96static void esp_output_done(struct crypto_async_request *base, int err)
97{
98 struct sk_buff *skb = base->data;
99
100 kfree(ESP_SKB_CB(skb)->tmp);
101 xfrm_output_resume(skb, err);
102}
103
17static int esp_output(struct xfrm_state *x, struct sk_buff *skb) 104static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
18{ 105{
19 int err; 106 int err;
20 struct ip_esp_hdr *esph; 107 struct ip_esp_hdr *esph;
21 struct crypto_blkcipher *tfm; 108 struct crypto_aead *aead;
22 struct blkcipher_desc desc; 109 struct aead_givcrypt_request *req;
110 struct scatterlist *sg;
111 struct scatterlist *asg;
23 struct esp_data *esp; 112 struct esp_data *esp;
24 struct sk_buff *trailer; 113 struct sk_buff *trailer;
114 void *tmp;
115 u8 *iv;
25 u8 *tail; 116 u8 *tail;
26 int blksize; 117 int blksize;
27 int clen; 118 int clen;
@@ -36,18 +127,27 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
36 clen = skb->len; 127 clen = skb->len;
37 128
38 esp = x->data; 129 esp = x->data;
39 alen = esp->auth.icv_trunc_len; 130 aead = esp->aead;
40 tfm = esp->conf.tfm; 131 alen = crypto_aead_authsize(aead);
41 desc.tfm = tfm; 132
42 desc.flags = 0; 133 blksize = ALIGN(crypto_aead_blocksize(aead), 4);
43 blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
44 clen = ALIGN(clen + 2, blksize); 134 clen = ALIGN(clen + 2, blksize);
45 if (esp->conf.padlen) 135 if (esp->padlen)
46 clen = ALIGN(clen, esp->conf.padlen); 136 clen = ALIGN(clen, esp->padlen);
137
138 if ((err = skb_cow_data(skb, clen - skb->len + alen, &trailer)) < 0)
139 goto error;
140 nfrags = err;
47 141
48 if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) 142 tmp = esp_alloc_tmp(aead, nfrags + 1);
143 if (!tmp)
49 goto error; 144 goto error;
50 145
146 iv = esp_tmp_iv(aead, tmp);
147 req = esp_tmp_givreq(aead, iv);
148 asg = esp_givreq_sg(aead, req);
149 sg = asg + 1;
150
51 /* Fill padding... */ 151 /* Fill padding... */
52 tail = skb_tail_pointer(trailer); 152 tail = skb_tail_pointer(trailer);
53 do { 153 do {
@@ -56,28 +156,34 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
56 tail[i] = i + 1; 156 tail[i] = i + 1;
57 } while (0); 157 } while (0);
58 tail[clen - skb->len - 2] = (clen - skb->len) - 2; 158 tail[clen - skb->len - 2] = (clen - skb->len) - 2;
59 pskb_put(skb, trailer, clen - skb->len); 159 tail[clen - skb->len - 1] = *skb_mac_header(skb);
160 pskb_put(skb, trailer, clen - skb->len + alen);
60 161
61 skb_push(skb, -skb_network_offset(skb)); 162 skb_push(skb, -skb_network_offset(skb));
62 esph = ip_esp_hdr(skb); 163 esph = ip_esp_hdr(skb);
63 *(skb_tail_pointer(trailer) - 1) = *skb_mac_header(skb);
64 *skb_mac_header(skb) = IPPROTO_ESP; 164 *skb_mac_header(skb) = IPPROTO_ESP;
65 165
66 spin_lock_bh(&x->lock);
67
68 /* this is non-NULL only with UDP Encapsulation */ 166 /* this is non-NULL only with UDP Encapsulation */
69 if (x->encap) { 167 if (x->encap) {
70 struct xfrm_encap_tmpl *encap = x->encap; 168 struct xfrm_encap_tmpl *encap = x->encap;
71 struct udphdr *uh; 169 struct udphdr *uh;
72 __be32 *udpdata32; 170 __be32 *udpdata32;
171 unsigned int sport, dport;
172 int encap_type;
173
174 spin_lock_bh(&x->lock);
175 sport = encap->encap_sport;
176 dport = encap->encap_dport;
177 encap_type = encap->encap_type;
178 spin_unlock_bh(&x->lock);
73 179
74 uh = (struct udphdr *)esph; 180 uh = (struct udphdr *)esph;
75 uh->source = encap->encap_sport; 181 uh->source = sport;
76 uh->dest = encap->encap_dport; 182 uh->dest = dport;
77 uh->len = htons(skb->len + alen - skb_transport_offset(skb)); 183 uh->len = htons(skb->len - skb_transport_offset(skb));
78 uh->check = 0; 184 uh->check = 0;
79 185
80 switch (encap->encap_type) { 186 switch (encap_type) {
81 default: 187 default:
82 case UDP_ENCAP_ESPINUDP: 188 case UDP_ENCAP_ESPINUDP:
83 esph = (struct ip_esp_hdr *)(uh + 1); 189 esph = (struct ip_esp_hdr *)(uh + 1);
@@ -95,131 +201,45 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
95 esph->spi = x->id.spi; 201 esph->spi = x->id.spi;
96 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq); 202 esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq);
97 203
98 if (esp->conf.ivlen) { 204 sg_init_table(sg, nfrags);
99 if (unlikely(!esp->conf.ivinitted)) { 205 skb_to_sgvec(skb, sg,
100 get_random_bytes(esp->conf.ivec, esp->conf.ivlen); 206 esph->enc_data + crypto_aead_ivsize(aead) - skb->data,
101 esp->conf.ivinitted = 1; 207 clen + alen);
102 } 208 sg_init_one(asg, esph, sizeof(*esph));
103 crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); 209
104 } 210 aead_givcrypt_set_callback(req, 0, esp_output_done, skb);
105 211 aead_givcrypt_set_crypt(req, sg, sg, clen, iv);
106 do { 212 aead_givcrypt_set_assoc(req, asg, sizeof(*esph));
107 struct scatterlist *sg = &esp->sgbuf[0]; 213 aead_givcrypt_set_giv(req, esph->enc_data, XFRM_SKB_CB(skb)->seq);
108 214
109 if (unlikely(nfrags > ESP_NUM_FAST_SG)) { 215 ESP_SKB_CB(skb)->tmp = tmp;
110 sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC); 216 err = crypto_aead_givencrypt(req);
111 if (!sg) 217 if (err == -EINPROGRESS)
112 goto unlock; 218 goto error;
113 }
114 sg_init_table(sg, nfrags);
115 skb_to_sgvec(skb, sg,
116 esph->enc_data +
117 esp->conf.ivlen -
118 skb->data, clen);
119 err = crypto_blkcipher_encrypt(&desc, sg, sg, clen);
120 if (unlikely(sg != &esp->sgbuf[0]))
121 kfree(sg);
122 } while (0);
123
124 if (unlikely(err))
125 goto unlock;
126
127 if (esp->conf.ivlen) {
128 memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen);
129 crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen);
130 }
131 219
132 if (esp->auth.icv_full_len) { 220 if (err == -EBUSY)
133 err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data, 221 err = NET_XMIT_DROP;
134 sizeof(*esph) + esp->conf.ivlen + clen);
135 memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen);
136 }
137 222
138unlock: 223 kfree(tmp);
139 spin_unlock_bh(&x->lock);
140 224
141error: 225error:
142 return err; 226 return err;
143} 227}
144 228
145/* 229static int esp_input_done2(struct sk_buff *skb, int err)
146 * Note: detecting truncated vs. non-truncated authentication data is very
147 * expensive, so we only support truncated data, which is the recommended
148 * and common case.
149 */
150static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
151{ 230{
152 struct iphdr *iph; 231 struct iphdr *iph;
153 struct ip_esp_hdr *esph; 232 struct xfrm_state *x = xfrm_input_state(skb);
154 struct esp_data *esp = x->data; 233 struct esp_data *esp = x->data;
155 struct crypto_blkcipher *tfm = esp->conf.tfm; 234 struct crypto_aead *aead = esp->aead;
156 struct blkcipher_desc desc = { .tfm = tfm }; 235 int alen = crypto_aead_authsize(aead);
157 struct sk_buff *trailer; 236 int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
158 int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); 237 int elen = skb->len - hlen;
159 int alen = esp->auth.icv_trunc_len;
160 int elen = skb->len - sizeof(*esph) - esp->conf.ivlen - alen;
161 int nfrags;
162 int ihl; 238 int ihl;
163 u8 nexthdr[2]; 239 u8 nexthdr[2];
164 struct scatterlist *sg;
165 int padlen; 240 int padlen;
166 int err = -EINVAL;
167
168 if (!pskb_may_pull(skb, sizeof(*esph)))
169 goto out;
170
171 if (elen <= 0 || (elen & (blksize-1)))
172 goto out;
173
174 if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
175 goto out;
176 nfrags = err;
177
178 skb->ip_summed = CHECKSUM_NONE;
179
180 spin_lock(&x->lock);
181
182 /* If integrity check is required, do this. */
183 if (esp->auth.icv_full_len) {
184 u8 sum[alen];
185 241
186 err = esp_mac_digest(esp, skb, 0, skb->len - alen); 242 kfree(ESP_SKB_CB(skb)->tmp);
187 if (err)
188 goto unlock;
189
190 if (skb_copy_bits(skb, skb->len - alen, sum, alen))
191 BUG();
192
193 if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
194 err = -EBADMSG;
195 goto unlock;
196 }
197 }
198
199 esph = (struct ip_esp_hdr *)skb->data;
200
201 /* Get ivec. This can be wrong, check against another impls. */
202 if (esp->conf.ivlen)
203 crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen);
204
205 sg = &esp->sgbuf[0];
206
207 if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
208 err = -ENOMEM;
209 sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
210 if (!sg)
211 goto unlock;
212 }
213 sg_init_table(sg, nfrags);
214 skb_to_sgvec(skb, sg,
215 sizeof(*esph) + esp->conf.ivlen,
216 elen);
217 err = crypto_blkcipher_decrypt(&desc, sg, sg, elen);
218 if (unlikely(sg != &esp->sgbuf[0]))
219 kfree(sg);
220
221unlock:
222 spin_unlock(&x->lock);
223 243
224 if (unlikely(err)) 244 if (unlikely(err))
225 goto out; 245 goto out;
@@ -229,15 +249,11 @@ unlock:
229 249
230 err = -EINVAL; 250 err = -EINVAL;
231 padlen = nexthdr[0]; 251 padlen = nexthdr[0];
232 if (padlen+2 >= elen) 252 if (padlen + 2 + alen >= elen)
233 goto out; 253 goto out;
234 254
235 /* ... check padding bits here. Silly. :-) */ 255 /* ... check padding bits here. Silly. :-) */
236 256
237 /* RFC4303: Drop dummy packets without any error */
238 if (nexthdr[1] == IPPROTO_NONE)
239 goto out;
240
241 iph = ip_hdr(skb); 257 iph = ip_hdr(skb);
242 ihl = iph->ihl * 4; 258 ihl = iph->ihl * 4;
243 259
@@ -279,10 +295,87 @@ unlock:
279 } 295 }
280 296
281 pskb_trim(skb, skb->len - alen - padlen - 2); 297 pskb_trim(skb, skb->len - alen - padlen - 2);
282 __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen); 298 __skb_pull(skb, hlen);
283 skb_set_transport_header(skb, -ihl); 299 skb_set_transport_header(skb, -ihl);
284 300
285 return nexthdr[1]; 301 err = nexthdr[1];
302
303 /* RFC4303: Drop dummy packets without any error */
304 if (err == IPPROTO_NONE)
305 err = -EINVAL;
306
307out:
308 return err;
309}
310
311static void esp_input_done(struct crypto_async_request *base, int err)
312{
313 struct sk_buff *skb = base->data;
314
315 xfrm_input_resume(skb, esp_input_done2(skb, err));
316}
317
318/*
319 * Note: detecting truncated vs. non-truncated authentication data is very
320 * expensive, so we only support truncated data, which is the recommended
321 * and common case.
322 */
323static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
324{
325 struct ip_esp_hdr *esph;
326 struct esp_data *esp = x->data;
327 struct crypto_aead *aead = esp->aead;
328 struct aead_request *req;
329 struct sk_buff *trailer;
330 int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead);
331 int nfrags;
332 void *tmp;
333 u8 *iv;
334 struct scatterlist *sg;
335 struct scatterlist *asg;
336 int err = -EINVAL;
337
338 if (!pskb_may_pull(skb, sizeof(*esph)))
339 goto out;
340
341 if (elen <= 0)
342 goto out;
343
344 if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
345 goto out;
346 nfrags = err;
347
348 err = -ENOMEM;
349 tmp = esp_alloc_tmp(aead, nfrags + 1);
350 if (!tmp)
351 goto out;
352
353 ESP_SKB_CB(skb)->tmp = tmp;
354 iv = esp_tmp_iv(aead, tmp);
355 req = esp_tmp_req(aead, iv);
356 asg = esp_req_sg(aead, req);
357 sg = asg + 1;
358
359 skb->ip_summed = CHECKSUM_NONE;
360
361 esph = (struct ip_esp_hdr *)skb->data;
362
363 /* Get ivec. This can be wrong, check against another impls. */
364 iv = esph->enc_data;
365
366 sg_init_table(sg, nfrags);
367 skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen);
368 sg_init_one(asg, esph, sizeof(*esph));
369
370 aead_request_set_callback(req, 0, esp_input_done, skb);
371 aead_request_set_crypt(req, sg, sg, elen, iv);
372 aead_request_set_assoc(req, asg, sizeof(*esph));
373
374 err = crypto_aead_decrypt(req);
375 if (err == -EINPROGRESS)
376 goto out;
377
378 err = esp_input_done2(skb, err);
286 379
287out: 380out:
288 return err; 381 return err;
@@ -291,11 +384,11 @@ out:
291static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) 384static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
292{ 385{
293 struct esp_data *esp = x->data; 386 struct esp_data *esp = x->data;
294 u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); 387 u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4);
295 u32 align = max_t(u32, blksize, esp->conf.padlen); 388 u32 align = max_t(u32, blksize, esp->padlen);
296 u32 rem; 389 u32 rem;
297 390
298 mtu -= x->props.header_len + esp->auth.icv_trunc_len; 391 mtu -= x->props.header_len + crypto_aead_authsize(esp->aead);
299 rem = mtu & (align - 1); 392 rem = mtu & (align - 1);
300 mtu &= ~(align - 1); 393 mtu &= ~(align - 1);
301 394
@@ -342,80 +435,143 @@ static void esp_destroy(struct xfrm_state *x)
342 if (!esp) 435 if (!esp)
343 return; 436 return;
344 437
345 crypto_free_blkcipher(esp->conf.tfm); 438 crypto_free_aead(esp->aead);
346 esp->conf.tfm = NULL;
347 kfree(esp->conf.ivec);
348 esp->conf.ivec = NULL;
349 crypto_free_hash(esp->auth.tfm);
350 esp->auth.tfm = NULL;
351 kfree(esp->auth.work_icv);
352 esp->auth.work_icv = NULL;
353 kfree(esp); 439 kfree(esp);
354} 440}
355 441
356static int esp_init_state(struct xfrm_state *x) 442static int esp_init_aead(struct xfrm_state *x)
357{ 443{
358 struct esp_data *esp = NULL; 444 struct esp_data *esp = x->data;
359 struct crypto_blkcipher *tfm; 445 struct crypto_aead *aead;
360 u32 align; 446 int err;
447
448 aead = crypto_alloc_aead(x->aead->alg_name, 0, 0);
449 err = PTR_ERR(aead);
450 if (IS_ERR(aead))
451 goto error;
452
453 esp->aead = aead;
454
455 err = crypto_aead_setkey(aead, x->aead->alg_key,
456 (x->aead->alg_key_len + 7) / 8);
457 if (err)
458 goto error;
459
460 err = crypto_aead_setauthsize(aead, x->aead->alg_icv_len / 8);
461 if (err)
462 goto error;
463
464error:
465 return err;
466}
361 467
468static int esp_init_authenc(struct xfrm_state *x)
469{
470 struct esp_data *esp = x->data;
471 struct crypto_aead *aead;
472 struct crypto_authenc_key_param *param;
473 struct rtattr *rta;
474 char *key;
475 char *p;
476 char authenc_name[CRYPTO_MAX_ALG_NAME];
477 unsigned int keylen;
478 int err;
479
480 err = -EINVAL;
362 if (x->ealg == NULL) 481 if (x->ealg == NULL)
363 goto error; 482 goto error;
364 483
365 esp = kzalloc(sizeof(*esp), GFP_KERNEL); 484 err = -ENAMETOOLONG;
366 if (esp == NULL) 485 if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)",
367 return -ENOMEM; 486 x->aalg ? x->aalg->alg_name : "digest_null",
487 x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME)
488 goto error;
489
490 aead = crypto_alloc_aead(authenc_name, 0, 0);
491 err = PTR_ERR(aead);
492 if (IS_ERR(aead))
493 goto error;
494
495 esp->aead = aead;
496
497 keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) +
498 (x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param));
499 err = -ENOMEM;
500 key = kmalloc(keylen, GFP_KERNEL);
501 if (!key)
502 goto error;
503
504 p = key;
505 rta = (void *)p;
506 rta->rta_type = CRYPTO_AUTHENC_KEYA_PARAM;
507 rta->rta_len = RTA_LENGTH(sizeof(*param));
508 param = RTA_DATA(rta);
509 p += RTA_SPACE(sizeof(*param));
368 510
369 if (x->aalg) { 511 if (x->aalg) {
370 struct xfrm_algo_desc *aalg_desc; 512 struct xfrm_algo_desc *aalg_desc;
371 struct crypto_hash *hash;
372 513
373 hash = crypto_alloc_hash(x->aalg->alg_name, 0, 514 memcpy(p, x->aalg->alg_key, (x->aalg->alg_key_len + 7) / 8);
374 CRYPTO_ALG_ASYNC); 515 p += (x->aalg->alg_key_len + 7) / 8;
375 if (IS_ERR(hash))
376 goto error;
377
378 esp->auth.tfm = hash;
379 if (crypto_hash_setkey(hash, x->aalg->alg_key,
380 (x->aalg->alg_key_len + 7) / 8))
381 goto error;
382 516
383 aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); 517 aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
384 BUG_ON(!aalg_desc); 518 BUG_ON(!aalg_desc);
385 519
520 err = -EINVAL;
386 if (aalg_desc->uinfo.auth.icv_fullbits/8 != 521 if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
387 crypto_hash_digestsize(hash)) { 522 crypto_aead_authsize(aead)) {
388 NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", 523 NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n",
389 x->aalg->alg_name, 524 x->aalg->alg_name,
390 crypto_hash_digestsize(hash), 525 crypto_aead_authsize(aead),
391 aalg_desc->uinfo.auth.icv_fullbits/8); 526 aalg_desc->uinfo.auth.icv_fullbits/8);
392 goto error; 527 goto free_key;
393 } 528 }
394 529
395 esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8; 530 err = crypto_aead_setauthsize(
396 esp->auth.icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8; 531 aead, aalg_desc->uinfo.auth.icv_truncbits / 8);
397 532 if (err)
398 esp->auth.work_icv = kmalloc(esp->auth.icv_full_len, GFP_KERNEL); 533 goto free_key;
399 if (!esp->auth.work_icv)
400 goto error;
401 } 534 }
402 535
403 tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC); 536 param->enckeylen = cpu_to_be32((x->ealg->alg_key_len + 7) / 8);
404 if (IS_ERR(tfm)) 537 memcpy(p, x->ealg->alg_key, (x->ealg->alg_key_len + 7) / 8);
405 goto error; 538
406 esp->conf.tfm = tfm; 539 err = crypto_aead_setkey(aead, key, keylen);
407 esp->conf.ivlen = crypto_blkcipher_ivsize(tfm); 540
408 esp->conf.padlen = 0; 541free_key:
409 if (esp->conf.ivlen) { 542 kfree(key);
410 esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); 543
411 if (unlikely(esp->conf.ivec == NULL)) 544error:
412 goto error; 545 return err;
413 esp->conf.ivinitted = 0; 546}
414 } 547
415 if (crypto_blkcipher_setkey(tfm, x->ealg->alg_key, 548static int esp_init_state(struct xfrm_state *x)
416 (x->ealg->alg_key_len + 7) / 8)) 549{
550 struct esp_data *esp;
551 struct crypto_aead *aead;
552 u32 align;
553 int err;
554
555 esp = kzalloc(sizeof(*esp), GFP_KERNEL);
556 if (esp == NULL)
557 return -ENOMEM;
558
559 x->data = esp;
560
561 if (x->aead)
562 err = esp_init_aead(x);
563 else
564 err = esp_init_authenc(x);
565
566 if (err)
417 goto error; 567 goto error;
418 x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; 568
569 aead = esp->aead;
570
571 esp->padlen = 0;
572
573 x->props.header_len = sizeof(struct ip_esp_hdr) +
574 crypto_aead_ivsize(aead);
419 if (x->props.mode == XFRM_MODE_TUNNEL) 575 if (x->props.mode == XFRM_MODE_TUNNEL)
420 x->props.header_len += sizeof(struct iphdr); 576 x->props.header_len += sizeof(struct iphdr);
421 else if (x->props.mode == XFRM_MODE_BEET) 577 else if (x->props.mode == XFRM_MODE_BEET)
@@ -434,21 +590,17 @@ static int esp_init_state(struct xfrm_state *x)
434 break; 590 break;
435 } 591 }
436 } 592 }
437 x->data = esp; 593
438 align = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); 594 align = ALIGN(crypto_aead_blocksize(aead), 4);
439 if (esp->conf.padlen) 595 if (esp->padlen)
440 align = max_t(u32, align, esp->conf.padlen); 596 align = max_t(u32, align, esp->padlen);
441 x->props.trailer_len = align + 1 + esp->auth.icv_trunc_len; 597 x->props.trailer_len = align + 1 + crypto_aead_authsize(esp->aead);
442 return 0;
443 598
444error: 599error:
445 x->data = esp; 600 return err;
446 esp_destroy(x);
447 x->data = NULL;
448 return -EINVAL;
449} 601}
450 602
451static struct xfrm_type esp_type = 603static const struct xfrm_type esp_type =
452{ 604{
453 .description = "ESP4", 605 .description = "ESP4",
454 .owner = THIS_MODULE, 606 .owner = THIS_MODULE,
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index d28261826bc2..86ff2711fc95 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -808,7 +808,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
808 First of all, we scan fib_info list searching 808 First of all, we scan fib_info list searching
809 for stray nexthop entries, then ignite fib_flush. 809 for stray nexthop entries, then ignite fib_flush.
810 */ 810 */
811 if (fib_sync_down(ifa->ifa_local, NULL, 0)) 811 if (fib_sync_down_addr(dev->nd_net, ifa->ifa_local))
812 fib_flush(dev->nd_net); 812 fib_flush(dev->nd_net);
813 } 813 }
814 } 814 }
@@ -898,7 +898,7 @@ static void nl_fib_lookup_exit(struct net *net)
898 898
899static void fib_disable_ip(struct net_device *dev, int force) 899static void fib_disable_ip(struct net_device *dev, int force)
900{ 900{
901 if (fib_sync_down(0, dev, force)) 901 if (fib_sync_down_dev(dev, force))
902 fib_flush(dev->nd_net); 902 fib_flush(dev->nd_net);
903 rt_cache_flush(0); 903 rt_cache_flush(0);
904 arp_ifdown(dev); 904 arp_ifdown(dev);
@@ -975,6 +975,7 @@ static struct notifier_block fib_netdev_notifier = {
975 975
976static int __net_init ip_fib_net_init(struct net *net) 976static int __net_init ip_fib_net_init(struct net *net)
977{ 977{
978 int err;
978 unsigned int i; 979 unsigned int i;
979 980
980 net->ipv4.fib_table_hash = kzalloc( 981 net->ipv4.fib_table_hash = kzalloc(
@@ -985,7 +986,14 @@ static int __net_init ip_fib_net_init(struct net *net)
985 for (i = 0; i < FIB_TABLE_HASHSZ; i++) 986 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
986 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]); 987 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
987 988
988 return fib4_rules_init(net); 989 err = fib4_rules_init(net);
990 if (err < 0)
991 goto fail;
992 return 0;
993
994fail:
995 kfree(net->ipv4.fib_table_hash);
996 return err;
989} 997}
990 998
991static void __net_exit ip_fib_net_exit(struct net *net) 999static void __net_exit ip_fib_net_exit(struct net *net)
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index a15b2f1b2721..76b9c684cccd 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -424,19 +424,43 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
424 424
425 if (fa && fa->fa_tos == tos && 425 if (fa && fa->fa_tos == tos &&
426 fa->fa_info->fib_priority == fi->fib_priority) { 426 fa->fa_info->fib_priority == fi->fib_priority) {
427 struct fib_alias *fa_orig; 427 struct fib_alias *fa_first, *fa_match;
428 428
429 err = -EEXIST; 429 err = -EEXIST;
430 if (cfg->fc_nlflags & NLM_F_EXCL) 430 if (cfg->fc_nlflags & NLM_F_EXCL)
431 goto out; 431 goto out;
432 432
433 /* We have 2 goals:
434 * 1. Find exact match for type, scope, fib_info to avoid
435 * duplicate routes
436 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
437 */
438 fa_match = NULL;
439 fa_first = fa;
440 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
441 list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
442 if (fa->fa_tos != tos)
443 break;
444 if (fa->fa_info->fib_priority != fi->fib_priority)
445 break;
446 if (fa->fa_type == cfg->fc_type &&
447 fa->fa_scope == cfg->fc_scope &&
448 fa->fa_info == fi) {
449 fa_match = fa;
450 break;
451 }
452 }
453
433 if (cfg->fc_nlflags & NLM_F_REPLACE) { 454 if (cfg->fc_nlflags & NLM_F_REPLACE) {
434 struct fib_info *fi_drop; 455 struct fib_info *fi_drop;
435 u8 state; 456 u8 state;
436 457
437 if (fi->fib_treeref > 1) 458 fa = fa_first;
459 if (fa_match) {
460 if (fa == fa_match)
461 err = 0;
438 goto out; 462 goto out;
439 463 }
440 write_lock_bh(&fib_hash_lock); 464 write_lock_bh(&fib_hash_lock);
441 fi_drop = fa->fa_info; 465 fi_drop = fa->fa_info;
442 fa->fa_info = fi; 466 fa->fa_info = fi;
@@ -459,20 +483,11 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
459 * uses the same scope, type, and nexthop 483 * uses the same scope, type, and nexthop
460 * information. 484 * information.
461 */ 485 */
462 fa_orig = fa; 486 if (fa_match)
463 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); 487 goto out;
464 list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { 488
465 if (fa->fa_tos != tos)
466 break;
467 if (fa->fa_info->fib_priority != fi->fib_priority)
468 break;
469 if (fa->fa_type == cfg->fc_type &&
470 fa->fa_scope == cfg->fc_scope &&
471 fa->fa_info == fi)
472 goto out;
473 }
474 if (!(cfg->fc_nlflags & NLM_F_APPEND)) 489 if (!(cfg->fc_nlflags & NLM_F_APPEND))
475 fa = fa_orig; 490 fa = fa_first;
476 } 491 }
477 492
478 err = -ENOENT; 493 err = -ENOENT;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c7912866d987..a13c84763d4c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -229,6 +229,8 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
229 head = &fib_info_hash[hash]; 229 head = &fib_info_hash[hash];
230 230
231 hlist_for_each_entry(fi, node, head, fib_hash) { 231 hlist_for_each_entry(fi, node, head, fib_hash) {
232 if (fi->fib_net != nfi->fib_net)
233 continue;
232 if (fi->fib_nhs != nfi->fib_nhs) 234 if (fi->fib_nhs != nfi->fib_nhs)
233 continue; 235 continue;
234 if (nfi->fib_protocol == fi->fib_protocol && 236 if (nfi->fib_protocol == fi->fib_protocol &&
@@ -687,6 +689,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
687 struct fib_info *fi = NULL; 689 struct fib_info *fi = NULL;
688 struct fib_info *ofi; 690 struct fib_info *ofi;
689 int nhs = 1; 691 int nhs = 1;
692 struct net *net = cfg->fc_nlinfo.nl_net;
690 693
691 /* Fast check to catch the most weird cases */ 694 /* Fast check to catch the most weird cases */
692 if (fib_props[cfg->fc_type].scope > cfg->fc_scope) 695 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
@@ -727,6 +730,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
727 goto failure; 730 goto failure;
728 fib_info_cnt++; 731 fib_info_cnt++;
729 732
733 fi->fib_net = net;
730 fi->fib_protocol = cfg->fc_protocol; 734 fi->fib_protocol = cfg->fc_protocol;
731 fi->fib_flags = cfg->fc_flags; 735 fi->fib_flags = cfg->fc_flags;
732 fi->fib_priority = cfg->fc_priority; 736 fi->fib_priority = cfg->fc_priority;
@@ -798,8 +802,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
798 if (nhs != 1 || nh->nh_gw) 802 if (nhs != 1 || nh->nh_gw)
799 goto err_inval; 803 goto err_inval;
800 nh->nh_scope = RT_SCOPE_NOWHERE; 804 nh->nh_scope = RT_SCOPE_NOWHERE;
801 nh->nh_dev = dev_get_by_index(cfg->fc_nlinfo.nl_net, 805 nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
802 fi->fib_nh->nh_oif);
803 err = -ENODEV; 806 err = -ENODEV;
804 if (nh->nh_dev == NULL) 807 if (nh->nh_dev == NULL)
805 goto failure; 808 goto failure;
@@ -813,8 +816,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
813 if (fi->fib_prefsrc) { 816 if (fi->fib_prefsrc) {
814 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || 817 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
815 fi->fib_prefsrc != cfg->fc_dst) 818 fi->fib_prefsrc != cfg->fc_dst)
816 if (inet_addr_type(cfg->fc_nlinfo.nl_net, 819 if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL)
817 fi->fib_prefsrc) != RTN_LOCAL)
818 goto err_inval; 820 goto err_inval;
819 } 821 }
820 822
@@ -1031,70 +1033,74 @@ nla_put_failure:
1031 referring to it. 1033 referring to it.
1032 - device went down -> we must shutdown all nexthops going via it. 1034 - device went down -> we must shutdown all nexthops going via it.
1033 */ 1035 */
1034 1036int fib_sync_down_addr(struct net *net, __be32 local)
1035int fib_sync_down(__be32 local, struct net_device *dev, int force)
1036{ 1037{
1037 int ret = 0; 1038 int ret = 0;
1038 int scope = RT_SCOPE_NOWHERE; 1039 unsigned int hash = fib_laddr_hashfn(local);
1039 1040 struct hlist_head *head = &fib_info_laddrhash[hash];
1040 if (force) 1041 struct hlist_node *node;
1041 scope = -1; 1042 struct fib_info *fi;
1042 1043
1043 if (local && fib_info_laddrhash) { 1044 if (fib_info_laddrhash == NULL || local == 0)
1044 unsigned int hash = fib_laddr_hashfn(local); 1045 return 0;
1045 struct hlist_head *head = &fib_info_laddrhash[hash];
1046 struct hlist_node *node;
1047 struct fib_info *fi;
1048 1046
1049 hlist_for_each_entry(fi, node, head, fib_lhash) { 1047 hlist_for_each_entry(fi, node, head, fib_lhash) {
1050 if (fi->fib_prefsrc == local) { 1048 if (fi->fib_net != net)
1051 fi->fib_flags |= RTNH_F_DEAD; 1049 continue;
1052 ret++; 1050 if (fi->fib_prefsrc == local) {
1053 } 1051 fi->fib_flags |= RTNH_F_DEAD;
1052 ret++;
1054 } 1053 }
1055 } 1054 }
1055 return ret;
1056}
1056 1057
1057 if (dev) { 1058int fib_sync_down_dev(struct net_device *dev, int force)
1058 struct fib_info *prev_fi = NULL; 1059{
1059 unsigned int hash = fib_devindex_hashfn(dev->ifindex); 1060 int ret = 0;
1060 struct hlist_head *head = &fib_info_devhash[hash]; 1061 int scope = RT_SCOPE_NOWHERE;
1061 struct hlist_node *node; 1062 struct fib_info *prev_fi = NULL;
1062 struct fib_nh *nh; 1063 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1064 struct hlist_head *head = &fib_info_devhash[hash];
1065 struct hlist_node *node;
1066 struct fib_nh *nh;
1063 1067
1064 hlist_for_each_entry(nh, node, head, nh_hash) { 1068 if (force)
1065 struct fib_info *fi = nh->nh_parent; 1069 scope = -1;
1066 int dead;
1067 1070
1068 BUG_ON(!fi->fib_nhs); 1071 hlist_for_each_entry(nh, node, head, nh_hash) {
1069 if (nh->nh_dev != dev || fi == prev_fi) 1072 struct fib_info *fi = nh->nh_parent;
1070 continue; 1073 int dead;
1071 prev_fi = fi; 1074
1072 dead = 0; 1075 BUG_ON(!fi->fib_nhs);
1073 change_nexthops(fi) { 1076 if (nh->nh_dev != dev || fi == prev_fi)
1074 if (nh->nh_flags&RTNH_F_DEAD) 1077 continue;
1075 dead++; 1078 prev_fi = fi;
1076 else if (nh->nh_dev == dev && 1079 dead = 0;
1077 nh->nh_scope != scope) { 1080 change_nexthops(fi) {
1078 nh->nh_flags |= RTNH_F_DEAD; 1081 if (nh->nh_flags&RTNH_F_DEAD)
1082 dead++;
1083 else if (nh->nh_dev == dev &&
1084 nh->nh_scope != scope) {
1085 nh->nh_flags |= RTNH_F_DEAD;
1079#ifdef CONFIG_IP_ROUTE_MULTIPATH 1086#ifdef CONFIG_IP_ROUTE_MULTIPATH
1080 spin_lock_bh(&fib_multipath_lock); 1087 spin_lock_bh(&fib_multipath_lock);
1081 fi->fib_power -= nh->nh_power; 1088 fi->fib_power -= nh->nh_power;
1082 nh->nh_power = 0; 1089 nh->nh_power = 0;
1083 spin_unlock_bh(&fib_multipath_lock); 1090 spin_unlock_bh(&fib_multipath_lock);
1084#endif 1091#endif
1085 dead++; 1092 dead++;
1086 } 1093 }
1087#ifdef CONFIG_IP_ROUTE_MULTIPATH 1094#ifdef CONFIG_IP_ROUTE_MULTIPATH
1088 if (force > 1 && nh->nh_dev == dev) { 1095 if (force > 1 && nh->nh_dev == dev) {
1089 dead = fi->fib_nhs; 1096 dead = fi->fib_nhs;
1090 break; 1097 break;
1091 }
1092#endif
1093 } endfor_nexthops(fi)
1094 if (dead == fi->fib_nhs) {
1095 fi->fib_flags |= RTNH_F_DEAD;
1096 ret++;
1097 } 1098 }
1099#endif
1100 } endfor_nexthops(fi)
1101 if (dead == fi->fib_nhs) {
1102 fi->fib_flags |= RTNH_F_DEAD;
1103 ret++;
1098 } 1104 }
1099 } 1105 }
1100 1106
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index f2f47033f31f..35851c96bdfb 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1205,20 +1205,45 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
1205 * and we need to allocate a new one of those as well. 1205 * and we need to allocate a new one of those as well.
1206 */ 1206 */
1207 1207
1208 if (fa && fa->fa_info->fib_priority == fi->fib_priority) { 1208 if (fa && fa->fa_tos == tos &&
1209 struct fib_alias *fa_orig; 1209 fa->fa_info->fib_priority == fi->fib_priority) {
1210 struct fib_alias *fa_first, *fa_match;
1210 1211
1211 err = -EEXIST; 1212 err = -EEXIST;
1212 if (cfg->fc_nlflags & NLM_F_EXCL) 1213 if (cfg->fc_nlflags & NLM_F_EXCL)
1213 goto out; 1214 goto out;
1214 1215
1216 /* We have 2 goals:
1217 * 1. Find exact match for type, scope, fib_info to avoid
1218 * duplicate routes
1219 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
1220 */
1221 fa_match = NULL;
1222 fa_first = fa;
1223 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1224 list_for_each_entry_continue(fa, fa_head, fa_list) {
1225 if (fa->fa_tos != tos)
1226 break;
1227 if (fa->fa_info->fib_priority != fi->fib_priority)
1228 break;
1229 if (fa->fa_type == cfg->fc_type &&
1230 fa->fa_scope == cfg->fc_scope &&
1231 fa->fa_info == fi) {
1232 fa_match = fa;
1233 break;
1234 }
1235 }
1236
1215 if (cfg->fc_nlflags & NLM_F_REPLACE) { 1237 if (cfg->fc_nlflags & NLM_F_REPLACE) {
1216 struct fib_info *fi_drop; 1238 struct fib_info *fi_drop;
1217 u8 state; 1239 u8 state;
1218 1240
1219 if (fi->fib_treeref > 1) 1241 fa = fa_first;
1242 if (fa_match) {
1243 if (fa == fa_match)
1244 err = 0;
1220 goto out; 1245 goto out;
1221 1246 }
1222 err = -ENOBUFS; 1247 err = -ENOBUFS;
1223 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 1248 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
1224 if (new_fa == NULL) 1249 if (new_fa == NULL)
@@ -1230,7 +1255,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
1230 new_fa->fa_type = cfg->fc_type; 1255 new_fa->fa_type = cfg->fc_type;
1231 new_fa->fa_scope = cfg->fc_scope; 1256 new_fa->fa_scope = cfg->fc_scope;
1232 state = fa->fa_state; 1257 state = fa->fa_state;
1233 new_fa->fa_state &= ~FA_S_ACCESSED; 1258 new_fa->fa_state = state & ~FA_S_ACCESSED;
1234 1259
1235 list_replace_rcu(&fa->fa_list, &new_fa->fa_list); 1260 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1236 alias_free_mem_rcu(fa); 1261 alias_free_mem_rcu(fa);
@@ -1247,20 +1272,11 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
1247 * uses the same scope, type, and nexthop 1272 * uses the same scope, type, and nexthop
1248 * information. 1273 * information.
1249 */ 1274 */
1250 fa_orig = fa; 1275 if (fa_match)
1251 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) { 1276 goto out;
1252 if (fa->fa_tos != tos)
1253 break;
1254 if (fa->fa_info->fib_priority != fi->fib_priority)
1255 break;
1256 if (fa->fa_type == cfg->fc_type &&
1257 fa->fa_scope == cfg->fc_scope &&
1258 fa->fa_info == fi)
1259 goto out;
1260 }
1261 1277
1262 if (!(cfg->fc_nlflags & NLM_F_APPEND)) 1278 if (!(cfg->fc_nlflags & NLM_F_APPEND))
1263 fa = fa_orig; 1279 fa = fa_first;
1264 } 1280 }
1265 err = -ENOENT; 1281 err = -ENOENT;
1266 if (!(cfg->fc_nlflags & NLM_F_CREATE)) 1282 if (!(cfg->fc_nlflags & NLM_F_CREATE))
@@ -1600,9 +1616,8 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
1600 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); 1616 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
1601 1617
1602 fa_to_delete = NULL; 1618 fa_to_delete = NULL;
1603 fa_head = fa->fa_list.prev; 1619 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1604 1620 list_for_each_entry_continue(fa, fa_head, fa_list) {
1605 list_for_each_entry(fa, fa_head, fa_list) {
1606 struct fib_info *fi = fa->fa_info; 1621 struct fib_info *fi = fa->fa_info;
1607 1622
1608 if (fa->fa_tos != tos) 1623 if (fa->fa_tos != tos)
@@ -1743,6 +1758,19 @@ static struct leaf *trie_nextleaf(struct leaf *l)
1743 return leaf_walk_rcu(p, c); 1758 return leaf_walk_rcu(p, c);
1744} 1759}
1745 1760
1761static struct leaf *trie_leafindex(struct trie *t, int index)
1762{
1763 struct leaf *l = trie_firstleaf(t);
1764
1765 while (index-- > 0) {
1766 l = trie_nextleaf(l);
1767 if (!l)
1768 break;
1769 }
1770 return l;
1771}
1772
1773
1746/* 1774/*
1747 * Caller must hold RTNL. 1775 * Caller must hold RTNL.
1748 */ 1776 */
@@ -1848,7 +1876,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1848 struct fib_alias *fa; 1876 struct fib_alias *fa;
1849 __be32 xkey = htonl(key); 1877 __be32 xkey = htonl(key);
1850 1878
1851 s_i = cb->args[4]; 1879 s_i = cb->args[5];
1852 i = 0; 1880 i = 0;
1853 1881
1854 /* rcu_read_lock is hold by caller */ 1882 /* rcu_read_lock is hold by caller */
@@ -1869,12 +1897,12 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1869 plen, 1897 plen,
1870 fa->fa_tos, 1898 fa->fa_tos,
1871 fa->fa_info, NLM_F_MULTI) < 0) { 1899 fa->fa_info, NLM_F_MULTI) < 0) {
1872 cb->args[4] = i; 1900 cb->args[5] = i;
1873 return -1; 1901 return -1;
1874 } 1902 }
1875 i++; 1903 i++;
1876 } 1904 }
1877 cb->args[4] = i; 1905 cb->args[5] = i;
1878 return skb->len; 1906 return skb->len;
1879} 1907}
1880 1908
@@ -1885,7 +1913,7 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
1885 struct hlist_node *node; 1913 struct hlist_node *node;
1886 int i, s_i; 1914 int i, s_i;
1887 1915
1888 s_i = cb->args[3]; 1916 s_i = cb->args[4];
1889 i = 0; 1917 i = 0;
1890 1918
1891 /* rcu_read_lock is hold by caller */ 1919 /* rcu_read_lock is hold by caller */
@@ -1896,19 +1924,19 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
1896 } 1924 }
1897 1925
1898 if (i > s_i) 1926 if (i > s_i)
1899 cb->args[4] = 0; 1927 cb->args[5] = 0;
1900 1928
1901 if (list_empty(&li->falh)) 1929 if (list_empty(&li->falh))
1902 continue; 1930 continue;
1903 1931
1904 if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) { 1932 if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
1905 cb->args[3] = i; 1933 cb->args[4] = i;
1906 return -1; 1934 return -1;
1907 } 1935 }
1908 i++; 1936 i++;
1909 } 1937 }
1910 1938
1911 cb->args[3] = i; 1939 cb->args[4] = i;
1912 return skb->len; 1940 return skb->len;
1913} 1941}
1914 1942
@@ -1918,35 +1946,37 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb,
1918 struct leaf *l; 1946 struct leaf *l;
1919 struct trie *t = (struct trie *) tb->tb_data; 1947 struct trie *t = (struct trie *) tb->tb_data;
1920 t_key key = cb->args[2]; 1948 t_key key = cb->args[2];
1949 int count = cb->args[3];
1921 1950
1922 rcu_read_lock(); 1951 rcu_read_lock();
1923 /* Dump starting at last key. 1952 /* Dump starting at last key.
1924 * Note: 0.0.0.0/0 (ie default) is first key. 1953 * Note: 0.0.0.0/0 (ie default) is first key.
1925 */ 1954 */
1926 if (!key) 1955 if (count == 0)
1927 l = trie_firstleaf(t); 1956 l = trie_firstleaf(t);
1928 else { 1957 else {
1958 /* Normally, continue from last key, but if that is missing
1959 * fallback to using slow rescan
1960 */
1929 l = fib_find_node(t, key); 1961 l = fib_find_node(t, key);
1930 if (!l) { 1962 if (!l)
1931 /* The table changed during the dump, rather than 1963 l = trie_leafindex(t, count);
1932 * giving partial data, just make application retry.
1933 */
1934 rcu_read_unlock();
1935 return -EBUSY;
1936 }
1937 } 1964 }
1938 1965
1939 while (l) { 1966 while (l) {
1940 cb->args[2] = l->key; 1967 cb->args[2] = l->key;
1941 if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) { 1968 if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
1969 cb->args[3] = count;
1942 rcu_read_unlock(); 1970 rcu_read_unlock();
1943 return -1; 1971 return -1;
1944 } 1972 }
1945 1973
1974 ++count;
1946 l = trie_nextleaf(l); 1975 l = trie_nextleaf(l);
1947 memset(&cb->args[3], 0, 1976 memset(&cb->args[4], 0,
1948 sizeof(cb->args) - 3*sizeof(cb->args[0])); 1977 sizeof(cb->args) - 4*sizeof(cb->args[0]));
1949 } 1978 }
1979 cb->args[3] = count;
1950 rcu_read_unlock(); 1980 rcu_read_unlock();
1951 1981
1952 return skb->len; 1982 return skb->len;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 7801cceb2d1b..de5a41de191a 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -87,6 +87,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
87 struct hlist_node *node; 87 struct hlist_node *node;
88 struct inet_bind_bucket *tb; 88 struct inet_bind_bucket *tb;
89 int ret; 89 int ret;
90 struct net *net = sk->sk_net;
90 91
91 local_bh_disable(); 92 local_bh_disable();
92 if (!snum) { 93 if (!snum) {
@@ -100,7 +101,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
100 head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; 101 head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
101 spin_lock(&head->lock); 102 spin_lock(&head->lock);
102 inet_bind_bucket_for_each(tb, node, &head->chain) 103 inet_bind_bucket_for_each(tb, node, &head->chain)
103 if (tb->port == rover) 104 if (tb->ib_net == net && tb->port == rover)
104 goto next; 105 goto next;
105 break; 106 break;
106 next: 107 next:
@@ -127,7 +128,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
127 head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; 128 head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)];
128 spin_lock(&head->lock); 129 spin_lock(&head->lock);
129 inet_bind_bucket_for_each(tb, node, &head->chain) 130 inet_bind_bucket_for_each(tb, node, &head->chain)
130 if (tb->port == snum) 131 if (tb->ib_net == net && tb->port == snum)
131 goto tb_found; 132 goto tb_found;
132 } 133 }
133 tb = NULL; 134 tb = NULL;
@@ -147,7 +148,8 @@ tb_found:
147 } 148 }
148tb_not_found: 149tb_not_found:
149 ret = 1; 150 ret = 1;
150 if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) 151 if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep,
152 net, head, snum)) == NULL)
151 goto fail_unlock; 153 goto fail_unlock;
152 if (hlist_empty(&tb->owners)) { 154 if (hlist_empty(&tb->owners)) {
153 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) 155 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 605ed2cd7972..da97695e7096 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -259,20 +259,22 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
259 const struct inet_diag_handler *handler; 259 const struct inet_diag_handler *handler;
260 260
261 handler = inet_diag_lock_handler(nlh->nlmsg_type); 261 handler = inet_diag_lock_handler(nlh->nlmsg_type);
262 if (!handler) 262 if (IS_ERR(handler)) {
263 return -ENOENT; 263 err = PTR_ERR(handler);
264 goto unlock;
265 }
264 266
265 hashinfo = handler->idiag_hashinfo; 267 hashinfo = handler->idiag_hashinfo;
266 err = -EINVAL; 268 err = -EINVAL;
267 269
268 if (req->idiag_family == AF_INET) { 270 if (req->idiag_family == AF_INET) {
269 sk = inet_lookup(hashinfo, req->id.idiag_dst[0], 271 sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0],
270 req->id.idiag_dport, req->id.idiag_src[0], 272 req->id.idiag_dport, req->id.idiag_src[0],
271 req->id.idiag_sport, req->id.idiag_if); 273 req->id.idiag_sport, req->id.idiag_if);
272 } 274 }
273#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 275#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
274 else if (req->idiag_family == AF_INET6) { 276 else if (req->idiag_family == AF_INET6) {
275 sk = inet6_lookup(hashinfo, 277 sk = inet6_lookup(&init_net, hashinfo,
276 (struct in6_addr *)req->id.idiag_dst, 278 (struct in6_addr *)req->id.idiag_dst,
277 req->id.idiag_dport, 279 req->id.idiag_dport,
278 (struct in6_addr *)req->id.idiag_src, 280 (struct in6_addr *)req->id.idiag_src,
@@ -708,8 +710,8 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
708 struct inet_hashinfo *hashinfo; 710 struct inet_hashinfo *hashinfo;
709 711
710 handler = inet_diag_lock_handler(cb->nlh->nlmsg_type); 712 handler = inet_diag_lock_handler(cb->nlh->nlmsg_type);
711 if (!handler) 713 if (IS_ERR(handler))
712 goto no_handler; 714 goto unlock;
713 715
714 hashinfo = handler->idiag_hashinfo; 716 hashinfo = handler->idiag_hashinfo;
715 717
@@ -838,7 +840,6 @@ done:
838 cb->args[2] = num; 840 cb->args[2] = num;
839unlock: 841unlock:
840 inet_diag_unlock_handler(handler); 842 inet_diag_unlock_handler(handler);
841no_handler:
842 return skb->len; 843 return skb->len;
843} 844}
844 845
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 619c63c6948a..48d45008f749 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -28,12 +28,14 @@
28 * The bindhash mutex for snum's hash chain must be held here. 28 * The bindhash mutex for snum's hash chain must be held here.
29 */ 29 */
30struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, 30struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
31 struct net *net,
31 struct inet_bind_hashbucket *head, 32 struct inet_bind_hashbucket *head,
32 const unsigned short snum) 33 const unsigned short snum)
33{ 34{
34 struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); 35 struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
35 36
36 if (tb != NULL) { 37 if (tb != NULL) {
38 tb->ib_net = net;
37 tb->port = snum; 39 tb->port = snum;
38 tb->fastreuse = 0; 40 tb->fastreuse = 0;
39 INIT_HLIST_HEAD(&tb->owners); 41 INIT_HLIST_HEAD(&tb->owners);
@@ -125,7 +127,8 @@ EXPORT_SYMBOL(inet_listen_wlock);
125 * remote address for the connection. So always assume those are both 127 * remote address for the connection. So always assume those are both
126 * wildcarded during the search since they can never be otherwise. 128 * wildcarded during the search since they can never be otherwise.
127 */ 129 */
128static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, 130static struct sock *inet_lookup_listener_slow(struct net *net,
131 const struct hlist_head *head,
129 const __be32 daddr, 132 const __be32 daddr,
130 const unsigned short hnum, 133 const unsigned short hnum,
131 const int dif) 134 const int dif)
@@ -137,7 +140,8 @@ static struct sock *inet_lookup_listener_slow(const struct hlist_head *head,
137 sk_for_each(sk, node, head) { 140 sk_for_each(sk, node, head) {
138 const struct inet_sock *inet = inet_sk(sk); 141 const struct inet_sock *inet = inet_sk(sk);
139 142
140 if (inet->num == hnum && !ipv6_only_sock(sk)) { 143 if (sk->sk_net == net && inet->num == hnum &&
144 !ipv6_only_sock(sk)) {
141 const __be32 rcv_saddr = inet->rcv_saddr; 145 const __be32 rcv_saddr = inet->rcv_saddr;
142 int score = sk->sk_family == PF_INET ? 1 : 0; 146 int score = sk->sk_family == PF_INET ? 1 : 0;
143 147
@@ -163,7 +167,8 @@ static struct sock *inet_lookup_listener_slow(const struct hlist_head *head,
163} 167}
164 168
165/* Optimize the common listener case. */ 169/* Optimize the common listener case. */
166struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, 170struct sock *__inet_lookup_listener(struct net *net,
171 struct inet_hashinfo *hashinfo,
167 const __be32 daddr, const unsigned short hnum, 172 const __be32 daddr, const unsigned short hnum,
168 const int dif) 173 const int dif)
169{ 174{
@@ -178,9 +183,9 @@ struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo,
178 if (inet->num == hnum && !sk->sk_node.next && 183 if (inet->num == hnum && !sk->sk_node.next &&
179 (!inet->rcv_saddr || inet->rcv_saddr == daddr) && 184 (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
180 (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && 185 (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
181 !sk->sk_bound_dev_if) 186 !sk->sk_bound_dev_if && sk->sk_net == net)
182 goto sherry_cache; 187 goto sherry_cache;
183 sk = inet_lookup_listener_slow(head, daddr, hnum, dif); 188 sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif);
184 } 189 }
185 if (sk) { 190 if (sk) {
186sherry_cache: 191sherry_cache:
@@ -191,7 +196,8 @@ sherry_cache:
191} 196}
192EXPORT_SYMBOL_GPL(__inet_lookup_listener); 197EXPORT_SYMBOL_GPL(__inet_lookup_listener);
193 198
194struct sock * __inet_lookup_established(struct inet_hashinfo *hashinfo, 199struct sock * __inet_lookup_established(struct net *net,
200 struct inet_hashinfo *hashinfo,
195 const __be32 saddr, const __be16 sport, 201 const __be32 saddr, const __be16 sport,
196 const __be32 daddr, const u16 hnum, 202 const __be32 daddr, const u16 hnum,
197 const int dif) 203 const int dif)
@@ -210,13 +216,15 @@ struct sock * __inet_lookup_established(struct inet_hashinfo *hashinfo,
210 prefetch(head->chain.first); 216 prefetch(head->chain.first);
211 read_lock(lock); 217 read_lock(lock);
212 sk_for_each(sk, node, &head->chain) { 218 sk_for_each(sk, node, &head->chain) {
213 if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) 219 if (INET_MATCH(sk, net, hash, acookie,
220 saddr, daddr, ports, dif))
214 goto hit; /* You sunk my battleship! */ 221 goto hit; /* You sunk my battleship! */
215 } 222 }
216 223
217 /* Must check for a TIME_WAIT'er before going to listener hash. */ 224 /* Must check for a TIME_WAIT'er before going to listener hash. */
218 sk_for_each(sk, node, &head->twchain) { 225 sk_for_each(sk, node, &head->twchain) {
219 if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) 226 if (INET_TW_MATCH(sk, net, hash, acookie,
227 saddr, daddr, ports, dif))
220 goto hit; 228 goto hit;
221 } 229 }
222 sk = NULL; 230 sk = NULL;
@@ -247,6 +255,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
247 struct sock *sk2; 255 struct sock *sk2;
248 const struct hlist_node *node; 256 const struct hlist_node *node;
249 struct inet_timewait_sock *tw; 257 struct inet_timewait_sock *tw;
258 struct net *net = sk->sk_net;
250 259
251 prefetch(head->chain.first); 260 prefetch(head->chain.first);
252 write_lock(lock); 261 write_lock(lock);
@@ -255,7 +264,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
255 sk_for_each(sk2, node, &head->twchain) { 264 sk_for_each(sk2, node, &head->twchain) {
256 tw = inet_twsk(sk2); 265 tw = inet_twsk(sk2);
257 266
258 if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { 267 if (INET_TW_MATCH(sk2, net, hash, acookie,
268 saddr, daddr, ports, dif)) {
259 if (twsk_unique(sk, sk2, twp)) 269 if (twsk_unique(sk, sk2, twp))
260 goto unique; 270 goto unique;
261 else 271 else
@@ -266,7 +276,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
266 276
267 /* And established part... */ 277 /* And established part... */
268 sk_for_each(sk2, node, &head->chain) { 278 sk_for_each(sk2, node, &head->chain) {
269 if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) 279 if (INET_MATCH(sk2, net, hash, acookie,
280 saddr, daddr, ports, dif))
270 goto not_unique; 281 goto not_unique;
271 } 282 }
272 283
@@ -348,17 +359,18 @@ void __inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk)
348} 359}
349EXPORT_SYMBOL_GPL(__inet_hash); 360EXPORT_SYMBOL_GPL(__inet_hash);
350 361
351/* 362int __inet_hash_connect(struct inet_timewait_death_row *death_row,
352 * Bind a port for a connect operation and hash it. 363 struct sock *sk,
353 */ 364 int (*check_established)(struct inet_timewait_death_row *,
354int inet_hash_connect(struct inet_timewait_death_row *death_row, 365 struct sock *, __u16, struct inet_timewait_sock **),
355 struct sock *sk) 366 void (*hash)(struct inet_hashinfo *, struct sock *))
356{ 367{
357 struct inet_hashinfo *hinfo = death_row->hashinfo; 368 struct inet_hashinfo *hinfo = death_row->hashinfo;
358 const unsigned short snum = inet_sk(sk)->num; 369 const unsigned short snum = inet_sk(sk)->num;
359 struct inet_bind_hashbucket *head; 370 struct inet_bind_hashbucket *head;
360 struct inet_bind_bucket *tb; 371 struct inet_bind_bucket *tb;
361 int ret; 372 int ret;
373 struct net *net = sk->sk_net;
362 374
363 if (!snum) { 375 if (!snum) {
364 int i, remaining, low, high, port; 376 int i, remaining, low, high, port;
@@ -381,19 +393,19 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
381 * unique enough. 393 * unique enough.
382 */ 394 */
383 inet_bind_bucket_for_each(tb, node, &head->chain) { 395 inet_bind_bucket_for_each(tb, node, &head->chain) {
384 if (tb->port == port) { 396 if (tb->ib_net == net && tb->port == port) {
385 BUG_TRAP(!hlist_empty(&tb->owners)); 397 BUG_TRAP(!hlist_empty(&tb->owners));
386 if (tb->fastreuse >= 0) 398 if (tb->fastreuse >= 0)
387 goto next_port; 399 goto next_port;
388 if (!__inet_check_established(death_row, 400 if (!check_established(death_row, sk,
389 sk, port, 401 port, &tw))
390 &tw))
391 goto ok; 402 goto ok;
392 goto next_port; 403 goto next_port;
393 } 404 }
394 } 405 }
395 406
396 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port); 407 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
408 net, head, port);
397 if (!tb) { 409 if (!tb) {
398 spin_unlock(&head->lock); 410 spin_unlock(&head->lock);
399 break; 411 break;
@@ -415,7 +427,7 @@ ok:
415 inet_bind_hash(sk, tb, port); 427 inet_bind_hash(sk, tb, port);
416 if (sk_unhashed(sk)) { 428 if (sk_unhashed(sk)) {
417 inet_sk(sk)->sport = htons(port); 429 inet_sk(sk)->sport = htons(port);
418 __inet_hash_nolisten(hinfo, sk); 430 hash(hinfo, sk);
419 } 431 }
420 spin_unlock(&head->lock); 432 spin_unlock(&head->lock);
421 433
@@ -432,17 +444,28 @@ ok:
432 tb = inet_csk(sk)->icsk_bind_hash; 444 tb = inet_csk(sk)->icsk_bind_hash;
433 spin_lock_bh(&head->lock); 445 spin_lock_bh(&head->lock);
434 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { 446 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
435 __inet_hash_nolisten(hinfo, sk); 447 hash(hinfo, sk);
436 spin_unlock_bh(&head->lock); 448 spin_unlock_bh(&head->lock);
437 return 0; 449 return 0;
438 } else { 450 } else {
439 spin_unlock(&head->lock); 451 spin_unlock(&head->lock);
440 /* No definite answer... Walk to established hash table */ 452 /* No definite answer... Walk to established hash table */
441 ret = __inet_check_established(death_row, sk, snum, NULL); 453 ret = check_established(death_row, sk, snum, NULL);
442out: 454out:
443 local_bh_enable(); 455 local_bh_enable();
444 return ret; 456 return ret;
445 } 457 }
446} 458}
459EXPORT_SYMBOL_GPL(__inet_hash_connect);
460
461/*
462 * Bind a port for a connect operation and hash it.
463 */
464int inet_hash_connect(struct inet_timewait_death_row *death_row,
465 struct sock *sk)
466{
467 return __inet_hash_connect(death_row, sk,
468 __inet_check_established, __inet_hash_nolisten);
469}
447 470
448EXPORT_SYMBOL_GPL(inet_hash_connect); 471EXPORT_SYMBOL_GPL(inet_hash_connect);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 18070ca65771..341779e685d9 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -168,6 +168,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
168 } 168 }
169 169
170 skb->priority = sk->sk_priority; 170 skb->priority = sk->sk_priority;
171 skb->mark = sk->sk_mark;
171 172
172 /* Send it out. */ 173 /* Send it out. */
173 return ip_local_out(skb); 174 return ip_local_out(skb);
@@ -385,6 +386,7 @@ packet_routed:
385 (skb_shinfo(skb)->gso_segs ?: 1) - 1); 386 (skb_shinfo(skb)->gso_segs ?: 1) - 1);
386 387
387 skb->priority = sk->sk_priority; 388 skb->priority = sk->sk_priority;
389 skb->mark = sk->sk_mark;
388 390
389 return ip_local_out(skb); 391 return ip_local_out(skb);
390 392
@@ -476,6 +478,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
476 if (skb_shinfo(skb)->frag_list) { 478 if (skb_shinfo(skb)->frag_list) {
477 struct sk_buff *frag; 479 struct sk_buff *frag;
478 int first_len = skb_pagelen(skb); 480 int first_len = skb_pagelen(skb);
481 int truesizes = 0;
479 482
480 if (first_len - hlen > mtu || 483 if (first_len - hlen > mtu ||
481 ((first_len - hlen) & 7) || 484 ((first_len - hlen) & 7) ||
@@ -499,7 +502,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
499 sock_hold(skb->sk); 502 sock_hold(skb->sk);
500 frag->sk = skb->sk; 503 frag->sk = skb->sk;
501 frag->destructor = sock_wfree; 504 frag->destructor = sock_wfree;
502 skb->truesize -= frag->truesize; 505 truesizes += frag->truesize;
503 } 506 }
504 } 507 }
505 508
@@ -510,6 +513,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
510 frag = skb_shinfo(skb)->frag_list; 513 frag = skb_shinfo(skb)->frag_list;
511 skb_shinfo(skb)->frag_list = NULL; 514 skb_shinfo(skb)->frag_list = NULL;
512 skb->data_len = first_len - skb_headlen(skb); 515 skb->data_len = first_len - skb_headlen(skb);
516 skb->truesize -= truesizes;
513 skb->len = first_len; 517 skb->len = first_len;
514 iph->tot_len = htons(first_len); 518 iph->tot_len = htons(first_len);
515 iph->frag_off = htons(IP_MF); 519 iph->frag_off = htons(IP_MF);
@@ -1284,6 +1288,7 @@ int ip_push_pending_frames(struct sock *sk)
1284 iph->daddr = rt->rt_dst; 1288 iph->daddr = rt->rt_dst;
1285 1289
1286 skb->priority = sk->sk_priority; 1290 skb->priority = sk->sk_priority;
1291 skb->mark = sk->sk_mark;
1287 skb->dst = dst_clone(&rt->u.dst); 1292 skb->dst = dst_clone(&rt->u.dst);
1288 1293
1289 if (iph->protocol == IPPROTO_ICMP) 1294 if (iph->protocol == IPPROTO_ICMP)
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index f4af99ad8fdb..ae1f45fc23b9 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -74,6 +74,7 @@ out:
74 74
75static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) 75static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
76{ 76{
77 int nexthdr;
77 int err = -ENOMEM; 78 int err = -ENOMEM;
78 struct ip_comp_hdr *ipch; 79 struct ip_comp_hdr *ipch;
79 80
@@ -84,13 +85,15 @@ static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
84 85
85 /* Remove ipcomp header and decompress original payload */ 86 /* Remove ipcomp header and decompress original payload */
86 ipch = (void *)skb->data; 87 ipch = (void *)skb->data;
88 nexthdr = ipch->nexthdr;
89
87 skb->transport_header = skb->network_header + sizeof(*ipch); 90 skb->transport_header = skb->network_header + sizeof(*ipch);
88 __skb_pull(skb, sizeof(*ipch)); 91 __skb_pull(skb, sizeof(*ipch));
89 err = ipcomp_decompress(x, skb); 92 err = ipcomp_decompress(x, skb);
90 if (err) 93 if (err)
91 goto out; 94 goto out;
92 95
93 err = ipch->nexthdr; 96 err = nexthdr;
94 97
95out: 98out:
96 return err; 99 return err;
@@ -434,7 +437,7 @@ error:
434 goto out; 437 goto out;
435} 438}
436 439
437static struct xfrm_type ipcomp_type = { 440static const struct xfrm_type ipcomp_type = {
438 .description = "IPCOMP4", 441 .description = "IPCOMP4",
439 .owner = THIS_MODULE, 442 .owner = THIS_MODULE,
440 .proto = IPPROTO_COMP, 443 .proto = IPPROTO_COMP,
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index b4a810c28ac8..a7591ce344d2 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -22,6 +22,7 @@
22#include <linux/mutex.h> 22#include <linux/mutex.h>
23#include <linux/err.h> 23#include <linux/err.h>
24#include <net/compat.h> 24#include <net/compat.h>
25#include <net/sock.h>
25#include <asm/uaccess.h> 26#include <asm/uaccess.h>
26 27
27#include <linux/netfilter/x_tables.h> 28#include <linux/netfilter/x_tables.h>
@@ -850,7 +851,7 @@ static int compat_table_info(const struct xt_table_info *info,
850} 851}
851#endif 852#endif
852 853
853static int get_info(void __user *user, int *len, int compat) 854static int get_info(struct net *net, void __user *user, int *len, int compat)
854{ 855{
855 char name[ARPT_TABLE_MAXNAMELEN]; 856 char name[ARPT_TABLE_MAXNAMELEN];
856 struct arpt_table *t; 857 struct arpt_table *t;
@@ -870,7 +871,7 @@ static int get_info(void __user *user, int *len, int compat)
870 if (compat) 871 if (compat)
871 xt_compat_lock(NF_ARP); 872 xt_compat_lock(NF_ARP);
872#endif 873#endif
873 t = try_then_request_module(xt_find_table_lock(NF_ARP, name), 874 t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name),
874 "arptable_%s", name); 875 "arptable_%s", name);
875 if (t && !IS_ERR(t)) { 876 if (t && !IS_ERR(t)) {
876 struct arpt_getinfo info; 877 struct arpt_getinfo info;
@@ -908,7 +909,8 @@ static int get_info(void __user *user, int *len, int compat)
908 return ret; 909 return ret;
909} 910}
910 911
911static int get_entries(struct arpt_get_entries __user *uptr, int *len) 912static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
913 int *len)
912{ 914{
913 int ret; 915 int ret;
914 struct arpt_get_entries get; 916 struct arpt_get_entries get;
@@ -926,7 +928,7 @@ static int get_entries(struct arpt_get_entries __user *uptr, int *len)
926 return -EINVAL; 928 return -EINVAL;
927 } 929 }
928 930
929 t = xt_find_table_lock(NF_ARP, get.name); 931 t = xt_find_table_lock(net, NF_ARP, get.name);
930 if (t && !IS_ERR(t)) { 932 if (t && !IS_ERR(t)) {
931 struct xt_table_info *private = t->private; 933 struct xt_table_info *private = t->private;
932 duprintf("t->private->number = %u\n", 934 duprintf("t->private->number = %u\n",
@@ -947,7 +949,8 @@ static int get_entries(struct arpt_get_entries __user *uptr, int *len)
947 return ret; 949 return ret;
948} 950}
949 951
950static int __do_replace(const char *name, unsigned int valid_hooks, 952static int __do_replace(struct net *net, const char *name,
953 unsigned int valid_hooks,
951 struct xt_table_info *newinfo, 954 struct xt_table_info *newinfo,
952 unsigned int num_counters, 955 unsigned int num_counters,
953 void __user *counters_ptr) 956 void __user *counters_ptr)
@@ -966,7 +969,7 @@ static int __do_replace(const char *name, unsigned int valid_hooks,
966 goto out; 969 goto out;
967 } 970 }
968 971
969 t = try_then_request_module(xt_find_table_lock(NF_ARP, name), 972 t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name),
970 "arptable_%s", name); 973 "arptable_%s", name);
971 if (!t || IS_ERR(t)) { 974 if (!t || IS_ERR(t)) {
972 ret = t ? PTR_ERR(t) : -ENOENT; 975 ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1019,7 +1022,7 @@ static int __do_replace(const char *name, unsigned int valid_hooks,
1019 return ret; 1022 return ret;
1020} 1023}
1021 1024
1022static int do_replace(void __user *user, unsigned int len) 1025static int do_replace(struct net *net, void __user *user, unsigned int len)
1023{ 1026{
1024 int ret; 1027 int ret;
1025 struct arpt_replace tmp; 1028 struct arpt_replace tmp;
@@ -1053,7 +1056,7 @@ static int do_replace(void __user *user, unsigned int len)
1053 1056
1054 duprintf("arp_tables: Translated table\n"); 1057 duprintf("arp_tables: Translated table\n");
1055 1058
1056 ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, 1059 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1057 tmp.num_counters, tmp.counters); 1060 tmp.num_counters, tmp.counters);
1058 if (ret) 1061 if (ret)
1059 goto free_newinfo_untrans; 1062 goto free_newinfo_untrans;
@@ -1080,7 +1083,8 @@ static inline int add_counter_to_entry(struct arpt_entry *e,
1080 return 0; 1083 return 0;
1081} 1084}
1082 1085
1083static int do_add_counters(void __user *user, unsigned int len, int compat) 1086static int do_add_counters(struct net *net, void __user *user, unsigned int len,
1087 int compat)
1084{ 1088{
1085 unsigned int i; 1089 unsigned int i;
1086 struct xt_counters_info tmp; 1090 struct xt_counters_info tmp;
@@ -1132,7 +1136,7 @@ static int do_add_counters(void __user *user, unsigned int len, int compat)
1132 goto free; 1136 goto free;
1133 } 1137 }
1134 1138
1135 t = xt_find_table_lock(NF_ARP, name); 1139 t = xt_find_table_lock(net, NF_ARP, name);
1136 if (!t || IS_ERR(t)) { 1140 if (!t || IS_ERR(t)) {
1137 ret = t ? PTR_ERR(t) : -ENOENT; 1141 ret = t ? PTR_ERR(t) : -ENOENT;
1138 goto free; 1142 goto free;
@@ -1435,7 +1439,8 @@ struct compat_arpt_replace {
1435 struct compat_arpt_entry entries[0]; 1439 struct compat_arpt_entry entries[0];
1436}; 1440};
1437 1441
1438static int compat_do_replace(void __user *user, unsigned int len) 1442static int compat_do_replace(struct net *net, void __user *user,
1443 unsigned int len)
1439{ 1444{
1440 int ret; 1445 int ret;
1441 struct compat_arpt_replace tmp; 1446 struct compat_arpt_replace tmp;
@@ -1471,7 +1476,7 @@ static int compat_do_replace(void __user *user, unsigned int len)
1471 1476
1472 duprintf("compat_do_replace: Translated table\n"); 1477 duprintf("compat_do_replace: Translated table\n");
1473 1478
1474 ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, 1479 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1475 tmp.num_counters, compat_ptr(tmp.counters)); 1480 tmp.num_counters, compat_ptr(tmp.counters));
1476 if (ret) 1481 if (ret)
1477 goto free_newinfo_untrans; 1482 goto free_newinfo_untrans;
@@ -1494,11 +1499,11 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
1494 1499
1495 switch (cmd) { 1500 switch (cmd) {
1496 case ARPT_SO_SET_REPLACE: 1501 case ARPT_SO_SET_REPLACE:
1497 ret = compat_do_replace(user, len); 1502 ret = compat_do_replace(sk->sk_net, user, len);
1498 break; 1503 break;
1499 1504
1500 case ARPT_SO_SET_ADD_COUNTERS: 1505 case ARPT_SO_SET_ADD_COUNTERS:
1501 ret = do_add_counters(user, len, 1); 1506 ret = do_add_counters(sk->sk_net, user, len, 1);
1502 break; 1507 break;
1503 1508
1504 default: 1509 default:
@@ -1584,7 +1589,8 @@ struct compat_arpt_get_entries {
1584 struct compat_arpt_entry entrytable[0]; 1589 struct compat_arpt_entry entrytable[0];
1585}; 1590};
1586 1591
1587static int compat_get_entries(struct compat_arpt_get_entries __user *uptr, 1592static int compat_get_entries(struct net *net,
1593 struct compat_arpt_get_entries __user *uptr,
1588 int *len) 1594 int *len)
1589{ 1595{
1590 int ret; 1596 int ret;
@@ -1604,7 +1610,7 @@ static int compat_get_entries(struct compat_arpt_get_entries __user *uptr,
1604 } 1610 }
1605 1611
1606 xt_compat_lock(NF_ARP); 1612 xt_compat_lock(NF_ARP);
1607 t = xt_find_table_lock(NF_ARP, get.name); 1613 t = xt_find_table_lock(net, NF_ARP, get.name);
1608 if (t && !IS_ERR(t)) { 1614 if (t && !IS_ERR(t)) {
1609 struct xt_table_info *private = t->private; 1615 struct xt_table_info *private = t->private;
1610 struct xt_table_info info; 1616 struct xt_table_info info;
@@ -1641,10 +1647,10 @@ static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user,
1641 1647
1642 switch (cmd) { 1648 switch (cmd) {
1643 case ARPT_SO_GET_INFO: 1649 case ARPT_SO_GET_INFO:
1644 ret = get_info(user, len, 1); 1650 ret = get_info(sk->sk_net, user, len, 1);
1645 break; 1651 break;
1646 case ARPT_SO_GET_ENTRIES: 1652 case ARPT_SO_GET_ENTRIES:
1647 ret = compat_get_entries(user, len); 1653 ret = compat_get_entries(sk->sk_net, user, len);
1648 break; 1654 break;
1649 default: 1655 default:
1650 ret = do_arpt_get_ctl(sk, cmd, user, len); 1656 ret = do_arpt_get_ctl(sk, cmd, user, len);
@@ -1662,11 +1668,11 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
1662 1668
1663 switch (cmd) { 1669 switch (cmd) {
1664 case ARPT_SO_SET_REPLACE: 1670 case ARPT_SO_SET_REPLACE:
1665 ret = do_replace(user, len); 1671 ret = do_replace(sk->sk_net, user, len);
1666 break; 1672 break;
1667 1673
1668 case ARPT_SO_SET_ADD_COUNTERS: 1674 case ARPT_SO_SET_ADD_COUNTERS:
1669 ret = do_add_counters(user, len, 0); 1675 ret = do_add_counters(sk->sk_net, user, len, 0);
1670 break; 1676 break;
1671 1677
1672 default: 1678 default:
@@ -1686,11 +1692,11 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
1686 1692
1687 switch (cmd) { 1693 switch (cmd) {
1688 case ARPT_SO_GET_INFO: 1694 case ARPT_SO_GET_INFO:
1689 ret = get_info(user, len, 0); 1695 ret = get_info(sk->sk_net, user, len, 0);
1690 break; 1696 break;
1691 1697
1692 case ARPT_SO_GET_ENTRIES: 1698 case ARPT_SO_GET_ENTRIES:
1693 ret = get_entries(user, len); 1699 ret = get_entries(sk->sk_net, user, len);
1694 break; 1700 break;
1695 1701
1696 case ARPT_SO_GET_REVISION_TARGET: { 1702 case ARPT_SO_GET_REVISION_TARGET: {
@@ -1719,19 +1725,21 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
1719 return ret; 1725 return ret;
1720} 1726}
1721 1727
1722int arpt_register_table(struct arpt_table *table, 1728struct arpt_table *arpt_register_table(struct net *net,
1723 const struct arpt_replace *repl) 1729 struct arpt_table *table,
1730 const struct arpt_replace *repl)
1724{ 1731{
1725 int ret; 1732 int ret;
1726 struct xt_table_info *newinfo; 1733 struct xt_table_info *newinfo;
1727 struct xt_table_info bootstrap 1734 struct xt_table_info bootstrap
1728 = { 0, 0, 0, { 0 }, { 0 }, { } }; 1735 = { 0, 0, 0, { 0 }, { 0 }, { } };
1729 void *loc_cpu_entry; 1736 void *loc_cpu_entry;
1737 struct xt_table *new_table;
1730 1738
1731 newinfo = xt_alloc_table_info(repl->size); 1739 newinfo = xt_alloc_table_info(repl->size);
1732 if (!newinfo) { 1740 if (!newinfo) {
1733 ret = -ENOMEM; 1741 ret = -ENOMEM;
1734 return ret; 1742 goto out;
1735 } 1743 }
1736 1744
1737 /* choose the copy on our node/cpu */ 1745 /* choose the copy on our node/cpu */
@@ -1745,24 +1753,27 @@ int arpt_register_table(struct arpt_table *table,
1745 repl->underflow); 1753 repl->underflow);
1746 1754
1747 duprintf("arpt_register_table: translate table gives %d\n", ret); 1755 duprintf("arpt_register_table: translate table gives %d\n", ret);
1748 if (ret != 0) { 1756 if (ret != 0)
1749 xt_free_table_info(newinfo); 1757 goto out_free;
1750 return ret;
1751 }
1752 1758
1753 ret = xt_register_table(table, &bootstrap, newinfo); 1759 new_table = xt_register_table(net, table, &bootstrap, newinfo);
1754 if (ret != 0) { 1760 if (IS_ERR(new_table)) {
1755 xt_free_table_info(newinfo); 1761 ret = PTR_ERR(new_table);
1756 return ret; 1762 goto out_free;
1757 } 1763 }
1764 return new_table;
1758 1765
1759 return 0; 1766out_free:
1767 xt_free_table_info(newinfo);
1768out:
1769 return ERR_PTR(ret);
1760} 1770}
1761 1771
1762void arpt_unregister_table(struct arpt_table *table) 1772void arpt_unregister_table(struct arpt_table *table)
1763{ 1773{
1764 struct xt_table_info *private; 1774 struct xt_table_info *private;
1765 void *loc_cpu_entry; 1775 void *loc_cpu_entry;
1776 struct module *table_owner = table->me;
1766 1777
1767 private = xt_unregister_table(table); 1778 private = xt_unregister_table(table);
1768 1779
@@ -1770,6 +1781,8 @@ void arpt_unregister_table(struct arpt_table *table)
1770 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 1781 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1771 ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size, 1782 ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size,
1772 cleanup_entry, NULL); 1783 cleanup_entry, NULL);
1784 if (private->number > private->initial_entries)
1785 module_put(table_owner);
1773 xt_free_table_info(private); 1786 xt_free_table_info(private);
1774} 1787}
1775 1788
@@ -1809,11 +1822,26 @@ static struct nf_sockopt_ops arpt_sockopts = {
1809 .owner = THIS_MODULE, 1822 .owner = THIS_MODULE,
1810}; 1823};
1811 1824
1825static int __net_init arp_tables_net_init(struct net *net)
1826{
1827 return xt_proto_init(net, NF_ARP);
1828}
1829
1830static void __net_exit arp_tables_net_exit(struct net *net)
1831{
1832 xt_proto_fini(net, NF_ARP);
1833}
1834
1835static struct pernet_operations arp_tables_net_ops = {
1836 .init = arp_tables_net_init,
1837 .exit = arp_tables_net_exit,
1838};
1839
1812static int __init arp_tables_init(void) 1840static int __init arp_tables_init(void)
1813{ 1841{
1814 int ret; 1842 int ret;
1815 1843
1816 ret = xt_proto_init(NF_ARP); 1844 ret = register_pernet_subsys(&arp_tables_net_ops);
1817 if (ret < 0) 1845 if (ret < 0)
1818 goto err1; 1846 goto err1;
1819 1847
@@ -1838,7 +1866,7 @@ err4:
1838err3: 1866err3:
1839 xt_unregister_target(&arpt_standard_target); 1867 xt_unregister_target(&arpt_standard_target);
1840err2: 1868err2:
1841 xt_proto_fini(NF_ARP); 1869 unregister_pernet_subsys(&arp_tables_net_ops);
1842err1: 1870err1:
1843 return ret; 1871 return ret;
1844} 1872}
@@ -1848,7 +1876,7 @@ static void __exit arp_tables_fini(void)
1848 nf_unregister_sockopt(&arpt_sockopts); 1876 nf_unregister_sockopt(&arpt_sockopts);
1849 xt_unregister_target(&arpt_error_target); 1877 xt_unregister_target(&arpt_error_target);
1850 xt_unregister_target(&arpt_standard_target); 1878 xt_unregister_target(&arpt_standard_target);
1851 xt_proto_fini(NF_ARP); 1879 unregister_pernet_subsys(&arp_tables_net_ops);
1852} 1880}
1853 1881
1854EXPORT_SYMBOL(arpt_register_table); 1882EXPORT_SYMBOL(arpt_register_table);
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 7201511d54d2..4e9c496a30c2 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -20,7 +20,7 @@ static struct
20 struct arpt_replace repl; 20 struct arpt_replace repl;
21 struct arpt_standard entries[3]; 21 struct arpt_standard entries[3];
22 struct arpt_error term; 22 struct arpt_error term;
23} initial_table __initdata = { 23} initial_table __net_initdata = {
24 .repl = { 24 .repl = {
25 .name = "filter", 25 .name = "filter",
26 .valid_hooks = FILTER_VALID_HOOKS, 26 .valid_hooks = FILTER_VALID_HOOKS,
@@ -61,7 +61,7 @@ static unsigned int arpt_hook(unsigned int hook,
61 const struct net_device *out, 61 const struct net_device *out,
62 int (*okfn)(struct sk_buff *)) 62 int (*okfn)(struct sk_buff *))
63{ 63{
64 return arpt_do_table(skb, hook, in, out, &packet_filter); 64 return arpt_do_table(skb, hook, in, out, init_net.ipv4.arptable_filter);
65} 65}
66 66
67static struct nf_hook_ops arpt_ops[] __read_mostly = { 67static struct nf_hook_ops arpt_ops[] __read_mostly = {
@@ -85,12 +85,31 @@ static struct nf_hook_ops arpt_ops[] __read_mostly = {
85 }, 85 },
86}; 86};
87 87
88static int __net_init arptable_filter_net_init(struct net *net)
89{
90 /* Register table */
91 net->ipv4.arptable_filter =
92 arpt_register_table(net, &packet_filter, &initial_table.repl);
93 if (IS_ERR(net->ipv4.arptable_filter))
94 return PTR_ERR(net->ipv4.arptable_filter);
95 return 0;
96}
97
98static void __net_exit arptable_filter_net_exit(struct net *net)
99{
100 arpt_unregister_table(net->ipv4.arptable_filter);
101}
102
103static struct pernet_operations arptable_filter_net_ops = {
104 .init = arptable_filter_net_init,
105 .exit = arptable_filter_net_exit,
106};
107
88static int __init arptable_filter_init(void) 108static int __init arptable_filter_init(void)
89{ 109{
90 int ret; 110 int ret;
91 111
92 /* Register table */ 112 ret = register_pernet_subsys(&arptable_filter_net_ops);
93 ret = arpt_register_table(&packet_filter, &initial_table.repl);
94 if (ret < 0) 113 if (ret < 0)
95 return ret; 114 return ret;
96 115
@@ -100,14 +119,14 @@ static int __init arptable_filter_init(void)
100 return ret; 119 return ret;
101 120
102cleanup_table: 121cleanup_table:
103 arpt_unregister_table(&packet_filter); 122 unregister_pernet_subsys(&arptable_filter_net_ops);
104 return ret; 123 return ret;
105} 124}
106 125
107static void __exit arptable_filter_fini(void) 126static void __exit arptable_filter_fini(void)
108{ 127{
109 nf_unregister_hooks(arpt_ops, ARRAY_SIZE(arpt_ops)); 128 nf_unregister_hooks(arpt_ops, ARRAY_SIZE(arpt_ops));
110 arpt_unregister_table(&packet_filter); 129 unregister_pernet_subsys(&arptable_filter_net_ops);
111} 130}
112 131
113module_init(arptable_filter_init); 132module_init(arptable_filter_init);
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 5109839da222..6bda1102851b 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -512,6 +512,7 @@ static struct notifier_block ipq_nl_notifier = {
512 .notifier_call = ipq_rcv_nl_event, 512 .notifier_call = ipq_rcv_nl_event,
513}; 513};
514 514
515#ifdef CONFIG_SYSCTL
515static struct ctl_table_header *ipq_sysctl_header; 516static struct ctl_table_header *ipq_sysctl_header;
516 517
517static ctl_table ipq_table[] = { 518static ctl_table ipq_table[] = {
@@ -525,7 +526,9 @@ static ctl_table ipq_table[] = {
525 }, 526 },
526 { .ctl_name = 0 } 527 { .ctl_name = 0 }
527}; 528};
529#endif
528 530
531#ifdef CONFIG_PROC_FS
529static int ip_queue_show(struct seq_file *m, void *v) 532static int ip_queue_show(struct seq_file *m, void *v)
530{ 533{
531 read_lock_bh(&queue_lock); 534 read_lock_bh(&queue_lock);
@@ -562,6 +565,7 @@ static const struct file_operations ip_queue_proc_fops = {
562 .release = single_release, 565 .release = single_release,
563 .owner = THIS_MODULE, 566 .owner = THIS_MODULE,
564}; 567};
568#endif
565 569
566static const struct nf_queue_handler nfqh = { 570static const struct nf_queue_handler nfqh = {
567 .name = "ip_queue", 571 .name = "ip_queue",
@@ -571,7 +575,7 @@ static const struct nf_queue_handler nfqh = {
571static int __init ip_queue_init(void) 575static int __init ip_queue_init(void)
572{ 576{
573 int status = -ENOMEM; 577 int status = -ENOMEM;
574 struct proc_dir_entry *proc; 578 struct proc_dir_entry *proc __maybe_unused;
575 579
576 netlink_register_notifier(&ipq_nl_notifier); 580 netlink_register_notifier(&ipq_nl_notifier);
577 ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0, 581 ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
@@ -581,6 +585,7 @@ static int __init ip_queue_init(void)
581 goto cleanup_netlink_notifier; 585 goto cleanup_netlink_notifier;
582 } 586 }
583 587
588#ifdef CONFIG_PROC_FS
584 proc = create_proc_entry(IPQ_PROC_FS_NAME, 0, init_net.proc_net); 589 proc = create_proc_entry(IPQ_PROC_FS_NAME, 0, init_net.proc_net);
585 if (proc) { 590 if (proc) {
586 proc->owner = THIS_MODULE; 591 proc->owner = THIS_MODULE;
@@ -589,10 +594,11 @@ static int __init ip_queue_init(void)
589 printk(KERN_ERR "ip_queue: failed to create proc entry\n"); 594 printk(KERN_ERR "ip_queue: failed to create proc entry\n");
590 goto cleanup_ipqnl; 595 goto cleanup_ipqnl;
591 } 596 }
592 597#endif
593 register_netdevice_notifier(&ipq_dev_notifier); 598 register_netdevice_notifier(&ipq_dev_notifier);
599#ifdef CONFIG_SYSCTL
594 ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table); 600 ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table);
595 601#endif
596 status = nf_register_queue_handler(PF_INET, &nfqh); 602 status = nf_register_queue_handler(PF_INET, &nfqh);
597 if (status < 0) { 603 if (status < 0) {
598 printk(KERN_ERR "ip_queue: failed to register queue handler\n"); 604 printk(KERN_ERR "ip_queue: failed to register queue handler\n");
@@ -601,10 +607,12 @@ static int __init ip_queue_init(void)
601 return status; 607 return status;
602 608
603cleanup_sysctl: 609cleanup_sysctl:
610#ifdef CONFIG_SYSCTL
604 unregister_sysctl_table(ipq_sysctl_header); 611 unregister_sysctl_table(ipq_sysctl_header);
612#endif
605 unregister_netdevice_notifier(&ipq_dev_notifier); 613 unregister_netdevice_notifier(&ipq_dev_notifier);
606 proc_net_remove(&init_net, IPQ_PROC_FS_NAME); 614 proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
607cleanup_ipqnl: 615cleanup_ipqnl: __maybe_unused
608 netlink_kernel_release(ipqnl); 616 netlink_kernel_release(ipqnl);
609 mutex_lock(&ipqnl_mutex); 617 mutex_lock(&ipqnl_mutex);
610 mutex_unlock(&ipqnl_mutex); 618 mutex_unlock(&ipqnl_mutex);
@@ -620,7 +628,9 @@ static void __exit ip_queue_fini(void)
620 synchronize_net(); 628 synchronize_net();
621 ipq_flush(NULL, 0); 629 ipq_flush(NULL, 0);
622 630
631#ifdef CONFIG_SYSCTL
623 unregister_sysctl_table(ipq_sysctl_header); 632 unregister_sysctl_table(ipq_sysctl_header);
633#endif
624 unregister_netdevice_notifier(&ipq_dev_notifier); 634 unregister_netdevice_notifier(&ipq_dev_notifier);
625 proc_net_remove(&init_net, IPQ_PROC_FS_NAME); 635 proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
626 636
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 982b7f986291..600737f122d2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -291,7 +291,7 @@ static void trace_packet(struct sk_buff *skb,
291 unsigned int hook, 291 unsigned int hook,
292 const struct net_device *in, 292 const struct net_device *in,
293 const struct net_device *out, 293 const struct net_device *out,
294 char *tablename, 294 const char *tablename,
295 struct xt_table_info *private, 295 struct xt_table_info *private,
296 struct ipt_entry *e) 296 struct ipt_entry *e)
297{ 297{
@@ -1092,7 +1092,7 @@ static int compat_table_info(const struct xt_table_info *info,
1092} 1092}
1093#endif 1093#endif
1094 1094
1095static int get_info(void __user *user, int *len, int compat) 1095static int get_info(struct net *net, void __user *user, int *len, int compat)
1096{ 1096{
1097 char name[IPT_TABLE_MAXNAMELEN]; 1097 char name[IPT_TABLE_MAXNAMELEN];
1098 struct xt_table *t; 1098 struct xt_table *t;
@@ -1112,7 +1112,7 @@ static int get_info(void __user *user, int *len, int compat)
1112 if (compat) 1112 if (compat)
1113 xt_compat_lock(AF_INET); 1113 xt_compat_lock(AF_INET);
1114#endif 1114#endif
1115 t = try_then_request_module(xt_find_table_lock(AF_INET, name), 1115 t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1116 "iptable_%s", name); 1116 "iptable_%s", name);
1117 if (t && !IS_ERR(t)) { 1117 if (t && !IS_ERR(t)) {
1118 struct ipt_getinfo info; 1118 struct ipt_getinfo info;
@@ -1152,7 +1152,7 @@ static int get_info(void __user *user, int *len, int compat)
1152} 1152}
1153 1153
1154static int 1154static int
1155get_entries(struct ipt_get_entries __user *uptr, int *len) 1155get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
1156{ 1156{
1157 int ret; 1157 int ret;
1158 struct ipt_get_entries get; 1158 struct ipt_get_entries get;
@@ -1170,7 +1170,7 @@ get_entries(struct ipt_get_entries __user *uptr, int *len)
1170 return -EINVAL; 1170 return -EINVAL;
1171 } 1171 }
1172 1172
1173 t = xt_find_table_lock(AF_INET, get.name); 1173 t = xt_find_table_lock(net, AF_INET, get.name);
1174 if (t && !IS_ERR(t)) { 1174 if (t && !IS_ERR(t)) {
1175 struct xt_table_info *private = t->private; 1175 struct xt_table_info *private = t->private;
1176 duprintf("t->private->number = %u\n", private->number); 1176 duprintf("t->private->number = %u\n", private->number);
@@ -1191,7 +1191,7 @@ get_entries(struct ipt_get_entries __user *uptr, int *len)
1191} 1191}
1192 1192
1193static int 1193static int
1194__do_replace(const char *name, unsigned int valid_hooks, 1194__do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1195 struct xt_table_info *newinfo, unsigned int num_counters, 1195 struct xt_table_info *newinfo, unsigned int num_counters,
1196 void __user *counters_ptr) 1196 void __user *counters_ptr)
1197{ 1197{
@@ -1208,7 +1208,7 @@ __do_replace(const char *name, unsigned int valid_hooks,
1208 goto out; 1208 goto out;
1209 } 1209 }
1210 1210
1211 t = try_then_request_module(xt_find_table_lock(AF_INET, name), 1211 t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1212 "iptable_%s", name); 1212 "iptable_%s", name);
1213 if (!t || IS_ERR(t)) { 1213 if (!t || IS_ERR(t)) {
1214 ret = t ? PTR_ERR(t) : -ENOENT; 1214 ret = t ? PTR_ERR(t) : -ENOENT;
@@ -1261,7 +1261,7 @@ __do_replace(const char *name, unsigned int valid_hooks,
1261} 1261}
1262 1262
1263static int 1263static int
1264do_replace(void __user *user, unsigned int len) 1264do_replace(struct net *net, void __user *user, unsigned int len)
1265{ 1265{
1266 int ret; 1266 int ret;
1267 struct ipt_replace tmp; 1267 struct ipt_replace tmp;
@@ -1295,7 +1295,7 @@ do_replace(void __user *user, unsigned int len)
1295 1295
1296 duprintf("ip_tables: Translated table\n"); 1296 duprintf("ip_tables: Translated table\n");
1297 1297
1298 ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, 1298 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1299 tmp.num_counters, tmp.counters); 1299 tmp.num_counters, tmp.counters);
1300 if (ret) 1300 if (ret)
1301 goto free_newinfo_untrans; 1301 goto free_newinfo_untrans;
@@ -1331,7 +1331,7 @@ add_counter_to_entry(struct ipt_entry *e,
1331} 1331}
1332 1332
1333static int 1333static int
1334do_add_counters(void __user *user, unsigned int len, int compat) 1334do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
1335{ 1335{
1336 unsigned int i; 1336 unsigned int i;
1337 struct xt_counters_info tmp; 1337 struct xt_counters_info tmp;
@@ -1383,7 +1383,7 @@ do_add_counters(void __user *user, unsigned int len, int compat)
1383 goto free; 1383 goto free;
1384 } 1384 }
1385 1385
1386 t = xt_find_table_lock(AF_INET, name); 1386 t = xt_find_table_lock(net, AF_INET, name);
1387 if (!t || IS_ERR(t)) { 1387 if (!t || IS_ERR(t)) {
1388 ret = t ? PTR_ERR(t) : -ENOENT; 1388 ret = t ? PTR_ERR(t) : -ENOENT;
1389 goto free; 1389 goto free;
@@ -1429,7 +1429,7 @@ struct compat_ipt_replace {
1429 1429
1430static int 1430static int
1431compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, 1431compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1432 compat_uint_t *size, struct xt_counters *counters, 1432 unsigned int *size, struct xt_counters *counters,
1433 unsigned int *i) 1433 unsigned int *i)
1434{ 1434{
1435 struct ipt_entry_target *t; 1435 struct ipt_entry_target *t;
@@ -1476,7 +1476,7 @@ compat_find_calc_match(struct ipt_entry_match *m,
1476 const char *name, 1476 const char *name,
1477 const struct ipt_ip *ip, 1477 const struct ipt_ip *ip,
1478 unsigned int hookmask, 1478 unsigned int hookmask,
1479 int *size, int *i) 1479 int *size, unsigned int *i)
1480{ 1480{
1481 struct xt_match *match; 1481 struct xt_match *match;
1482 1482
@@ -1534,7 +1534,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1534 struct ipt_entry_target *t; 1534 struct ipt_entry_target *t;
1535 struct xt_target *target; 1535 struct xt_target *target;
1536 unsigned int entry_offset; 1536 unsigned int entry_offset;
1537 int ret, off, h, j; 1537 unsigned int j;
1538 int ret, off, h;
1538 1539
1539 duprintf("check_compat_entry_size_and_hooks %p\n", e); 1540 duprintf("check_compat_entry_size_and_hooks %p\n", e);
1540 if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 1541 if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0
@@ -1647,7 +1648,8 @@ static int
1647compat_check_entry(struct ipt_entry *e, const char *name, 1648compat_check_entry(struct ipt_entry *e, const char *name,
1648 unsigned int *i) 1649 unsigned int *i)
1649{ 1650{
1650 int j, ret; 1651 unsigned int j;
1652 int ret;
1651 1653
1652 j = 0; 1654 j = 0;
1653 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, 1655 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip,
@@ -1789,7 +1791,7 @@ out_unlock:
1789} 1791}
1790 1792
1791static int 1793static int
1792compat_do_replace(void __user *user, unsigned int len) 1794compat_do_replace(struct net *net, void __user *user, unsigned int len)
1793{ 1795{
1794 int ret; 1796 int ret;
1795 struct compat_ipt_replace tmp; 1797 struct compat_ipt_replace tmp;
@@ -1826,7 +1828,7 @@ compat_do_replace(void __user *user, unsigned int len)
1826 1828
1827 duprintf("compat_do_replace: Translated table\n"); 1829 duprintf("compat_do_replace: Translated table\n");
1828 1830
1829 ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, 1831 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1830 tmp.num_counters, compat_ptr(tmp.counters)); 1832 tmp.num_counters, compat_ptr(tmp.counters));
1831 if (ret) 1833 if (ret)
1832 goto free_newinfo_untrans; 1834 goto free_newinfo_untrans;
@@ -1850,11 +1852,11 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
1850 1852
1851 switch (cmd) { 1853 switch (cmd) {
1852 case IPT_SO_SET_REPLACE: 1854 case IPT_SO_SET_REPLACE:
1853 ret = compat_do_replace(user, len); 1855 ret = compat_do_replace(sk->sk_net, user, len);
1854 break; 1856 break;
1855 1857
1856 case IPT_SO_SET_ADD_COUNTERS: 1858 case IPT_SO_SET_ADD_COUNTERS:
1857 ret = do_add_counters(user, len, 1); 1859 ret = do_add_counters(sk->sk_net, user, len, 1);
1858 break; 1860 break;
1859 1861
1860 default: 1862 default:
@@ -1903,7 +1905,8 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1903} 1905}
1904 1906
1905static int 1907static int
1906compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len) 1908compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1909 int *len)
1907{ 1910{
1908 int ret; 1911 int ret;
1909 struct compat_ipt_get_entries get; 1912 struct compat_ipt_get_entries get;
@@ -1924,7 +1927,7 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
1924 } 1927 }
1925 1928
1926 xt_compat_lock(AF_INET); 1929 xt_compat_lock(AF_INET);
1927 t = xt_find_table_lock(AF_INET, get.name); 1930 t = xt_find_table_lock(net, AF_INET, get.name);
1928 if (t && !IS_ERR(t)) { 1931 if (t && !IS_ERR(t)) {
1929 struct xt_table_info *private = t->private; 1932 struct xt_table_info *private = t->private;
1930 struct xt_table_info info; 1933 struct xt_table_info info;
@@ -1960,10 +1963,10 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1960 1963
1961 switch (cmd) { 1964 switch (cmd) {
1962 case IPT_SO_GET_INFO: 1965 case IPT_SO_GET_INFO:
1963 ret = get_info(user, len, 1); 1966 ret = get_info(sk->sk_net, user, len, 1);
1964 break; 1967 break;
1965 case IPT_SO_GET_ENTRIES: 1968 case IPT_SO_GET_ENTRIES:
1966 ret = compat_get_entries(user, len); 1969 ret = compat_get_entries(sk->sk_net, user, len);
1967 break; 1970 break;
1968 default: 1971 default:
1969 ret = do_ipt_get_ctl(sk, cmd, user, len); 1972 ret = do_ipt_get_ctl(sk, cmd, user, len);
@@ -1982,11 +1985,11 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1982 1985
1983 switch (cmd) { 1986 switch (cmd) {
1984 case IPT_SO_SET_REPLACE: 1987 case IPT_SO_SET_REPLACE:
1985 ret = do_replace(user, len); 1988 ret = do_replace(sk->sk_net, user, len);
1986 break; 1989 break;
1987 1990
1988 case IPT_SO_SET_ADD_COUNTERS: 1991 case IPT_SO_SET_ADD_COUNTERS:
1989 ret = do_add_counters(user, len, 0); 1992 ret = do_add_counters(sk->sk_net, user, len, 0);
1990 break; 1993 break;
1991 1994
1992 default: 1995 default:
@@ -2007,11 +2010,11 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2007 2010
2008 switch (cmd) { 2011 switch (cmd) {
2009 case IPT_SO_GET_INFO: 2012 case IPT_SO_GET_INFO:
2010 ret = get_info(user, len, 0); 2013 ret = get_info(sk->sk_net, user, len, 0);
2011 break; 2014 break;
2012 2015
2013 case IPT_SO_GET_ENTRIES: 2016 case IPT_SO_GET_ENTRIES:
2014 ret = get_entries(user, len); 2017 ret = get_entries(sk->sk_net, user, len);
2015 break; 2018 break;
2016 2019
2017 case IPT_SO_GET_REVISION_MATCH: 2020 case IPT_SO_GET_REVISION_MATCH:
@@ -2048,17 +2051,21 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2048 return ret; 2051 return ret;
2049} 2052}
2050 2053
2051int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) 2054struct xt_table *ipt_register_table(struct net *net, struct xt_table *table,
2055 const struct ipt_replace *repl)
2052{ 2056{
2053 int ret; 2057 int ret;
2054 struct xt_table_info *newinfo; 2058 struct xt_table_info *newinfo;
2055 struct xt_table_info bootstrap 2059 struct xt_table_info bootstrap
2056 = { 0, 0, 0, { 0 }, { 0 }, { } }; 2060 = { 0, 0, 0, { 0 }, { 0 }, { } };
2057 void *loc_cpu_entry; 2061 void *loc_cpu_entry;
2062 struct xt_table *new_table;
2058 2063
2059 newinfo = xt_alloc_table_info(repl->size); 2064 newinfo = xt_alloc_table_info(repl->size);
2060 if (!newinfo) 2065 if (!newinfo) {
2061 return -ENOMEM; 2066 ret = -ENOMEM;
2067 goto out;
2068 }
2062 2069
2063 /* choose the copy on our node/cpu, but dont care about preemption */ 2070 /* choose the copy on our node/cpu, but dont care about preemption */
2064 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; 2071 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
@@ -2069,30 +2076,36 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
2069 repl->num_entries, 2076 repl->num_entries,
2070 repl->hook_entry, 2077 repl->hook_entry,
2071 repl->underflow); 2078 repl->underflow);
2072 if (ret != 0) { 2079 if (ret != 0)
2073 xt_free_table_info(newinfo); 2080 goto out_free;
2074 return ret;
2075 }
2076 2081
2077 ret = xt_register_table(table, &bootstrap, newinfo); 2082 new_table = xt_register_table(net, table, &bootstrap, newinfo);
2078 if (ret != 0) { 2083 if (IS_ERR(new_table)) {
2079 xt_free_table_info(newinfo); 2084 ret = PTR_ERR(new_table);
2080 return ret; 2085 goto out_free;
2081 } 2086 }
2082 2087
2083 return 0; 2088 return new_table;
2089
2090out_free:
2091 xt_free_table_info(newinfo);
2092out:
2093 return ERR_PTR(ret);
2084} 2094}
2085 2095
2086void ipt_unregister_table(struct xt_table *table) 2096void ipt_unregister_table(struct xt_table *table)
2087{ 2097{
2088 struct xt_table_info *private; 2098 struct xt_table_info *private;
2089 void *loc_cpu_entry; 2099 void *loc_cpu_entry;
2100 struct module *table_owner = table->me;
2090 2101
2091 private = xt_unregister_table(table); 2102 private = xt_unregister_table(table);
2092 2103
2093 /* Decrease module usage counts and free resources */ 2104 /* Decrease module usage counts and free resources */
2094 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 2105 loc_cpu_entry = private->entries[raw_smp_processor_id()];
2095 IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL); 2106 IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
2107 if (private->number > private->initial_entries)
2108 module_put(table_owner);
2096 xt_free_table_info(private); 2109 xt_free_table_info(private);
2097} 2110}
2098 2111
@@ -2200,11 +2213,26 @@ static struct xt_match icmp_matchstruct __read_mostly = {
2200 .family = AF_INET, 2213 .family = AF_INET,
2201}; 2214};
2202 2215
2216static int __net_init ip_tables_net_init(struct net *net)
2217{
2218 return xt_proto_init(net, AF_INET);
2219}
2220
2221static void __net_exit ip_tables_net_exit(struct net *net)
2222{
2223 xt_proto_fini(net, AF_INET);
2224}
2225
2226static struct pernet_operations ip_tables_net_ops = {
2227 .init = ip_tables_net_init,
2228 .exit = ip_tables_net_exit,
2229};
2230
2203static int __init ip_tables_init(void) 2231static int __init ip_tables_init(void)
2204{ 2232{
2205 int ret; 2233 int ret;
2206 2234
2207 ret = xt_proto_init(AF_INET); 2235 ret = register_pernet_subsys(&ip_tables_net_ops);
2208 if (ret < 0) 2236 if (ret < 0)
2209 goto err1; 2237 goto err1;
2210 2238
@@ -2234,7 +2262,7 @@ err4:
2234err3: 2262err3:
2235 xt_unregister_target(&ipt_standard_target); 2263 xt_unregister_target(&ipt_standard_target);
2236err2: 2264err2:
2237 xt_proto_fini(AF_INET); 2265 unregister_pernet_subsys(&ip_tables_net_ops);
2238err1: 2266err1:
2239 return ret; 2267 return ret;
2240} 2268}
@@ -2247,7 +2275,7 @@ static void __exit ip_tables_fini(void)
2247 xt_unregister_target(&ipt_error_target); 2275 xt_unregister_target(&ipt_error_target);
2248 xt_unregister_target(&ipt_standard_target); 2276 xt_unregister_target(&ipt_standard_target);
2249 2277
2250 xt_proto_fini(AF_INET); 2278 unregister_pernet_subsys(&ip_tables_net_ops);
2251} 2279}
2252 2280
2253EXPORT_SYMBOL(ipt_register_table); 2281EXPORT_SYMBOL(ipt_register_table);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 1b31f7d14d46..c6cf84c77611 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -76,13 +76,6 @@ clusterip_config_put(struct clusterip_config *c)
76 kfree(c); 76 kfree(c);
77} 77}
78 78
79/* increase the count of entries(rules) using/referencing this config */
80static inline void
81clusterip_config_entry_get(struct clusterip_config *c)
82{
83 atomic_inc(&c->entries);
84}
85
86/* decrease the count of entries using/referencing this config. If last 79/* decrease the count of entries using/referencing this config. If last
87 * entry(rule) is removed, remove the config from lists, but don't free it 80 * entry(rule) is removed, remove the config from lists, but don't free it
88 * yet, since proc-files could still be holding references */ 81 * yet, since proc-files could still be holding references */
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index e3154a99c08a..68cbe3ca01ce 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -212,11 +212,11 @@ recent_mt(const struct sk_buff *skb, const struct net_device *in,
212 recent_entry_remove(t, e); 212 recent_entry_remove(t, e);
213 ret = !ret; 213 ret = !ret;
214 } else if (info->check_set & (IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) { 214 } else if (info->check_set & (IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) {
215 unsigned long t = jiffies - info->seconds * HZ; 215 unsigned long time = jiffies - info->seconds * HZ;
216 unsigned int i, hits = 0; 216 unsigned int i, hits = 0;
217 217
218 for (i = 0; i < e->nstamps; i++) { 218 for (i = 0; i < e->nstamps; i++) {
219 if (info->seconds && time_after(t, e->stamps[i])) 219 if (info->seconds && time_after(time, e->stamps[i]))
220 continue; 220 continue;
221 if (++hits >= info->hit_count) { 221 if (++hits >= info->hit_count) {
222 ret = !ret; 222 ret = !ret;
@@ -320,6 +320,7 @@ struct recent_iter_state {
320}; 320};
321 321
322static void *recent_seq_start(struct seq_file *seq, loff_t *pos) 322static void *recent_seq_start(struct seq_file *seq, loff_t *pos)
323 __acquires(recent_lock)
323{ 324{
324 struct recent_iter_state *st = seq->private; 325 struct recent_iter_state *st = seq->private;
325 const struct recent_table *t = st->table; 326 const struct recent_table *t = st->table;
@@ -352,6 +353,7 @@ static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos)
352} 353}
353 354
354static void recent_seq_stop(struct seq_file *s, void *v) 355static void recent_seq_stop(struct seq_file *s, void *v)
356 __releases(recent_lock)
355{ 357{
356 spin_unlock_bh(&recent_lock); 358 spin_unlock_bh(&recent_lock);
357} 359}
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 29bb4f9fbda0..69f3d7e6e96f 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -28,7 +28,7 @@ static struct
28 struct ipt_replace repl; 28 struct ipt_replace repl;
29 struct ipt_standard entries[3]; 29 struct ipt_standard entries[3];
30 struct ipt_error term; 30 struct ipt_error term;
31} initial_table __initdata = { 31} initial_table __net_initdata = {
32 .repl = { 32 .repl = {
33 .name = "filter", 33 .name = "filter",
34 .valid_hooks = FILTER_VALID_HOOKS, 34 .valid_hooks = FILTER_VALID_HOOKS,
@@ -69,7 +69,7 @@ ipt_hook(unsigned int hook,
69 const struct net_device *out, 69 const struct net_device *out,
70 int (*okfn)(struct sk_buff *)) 70 int (*okfn)(struct sk_buff *))
71{ 71{
72 return ipt_do_table(skb, hook, in, out, &packet_filter); 72 return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter);
73} 73}
74 74
75static unsigned int 75static unsigned int
@@ -88,7 +88,7 @@ ipt_local_out_hook(unsigned int hook,
88 return NF_ACCEPT; 88 return NF_ACCEPT;
89 } 89 }
90 90
91 return ipt_do_table(skb, hook, in, out, &packet_filter); 91 return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter);
92} 92}
93 93
94static struct nf_hook_ops ipt_ops[] __read_mostly = { 94static struct nf_hook_ops ipt_ops[] __read_mostly = {
@@ -119,6 +119,26 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = {
119static int forward = NF_ACCEPT; 119static int forward = NF_ACCEPT;
120module_param(forward, bool, 0000); 120module_param(forward, bool, 0000);
121 121
122static int __net_init iptable_filter_net_init(struct net *net)
123{
124 /* Register table */
125 net->ipv4.iptable_filter =
126 ipt_register_table(net, &packet_filter, &initial_table.repl);
127 if (IS_ERR(net->ipv4.iptable_filter))
128 return PTR_ERR(net->ipv4.iptable_filter);
129 return 0;
130}
131
132static void __net_exit iptable_filter_net_exit(struct net *net)
133{
134 ipt_unregister_table(net->ipv4.iptable_filter);
135}
136
137static struct pernet_operations iptable_filter_net_ops = {
138 .init = iptable_filter_net_init,
139 .exit = iptable_filter_net_exit,
140};
141
122static int __init iptable_filter_init(void) 142static int __init iptable_filter_init(void)
123{ 143{
124 int ret; 144 int ret;
@@ -131,8 +151,7 @@ static int __init iptable_filter_init(void)
131 /* Entry 1 is the FORWARD hook */ 151 /* Entry 1 is the FORWARD hook */
132 initial_table.entries[1].target.verdict = -forward - 1; 152 initial_table.entries[1].target.verdict = -forward - 1;
133 153
134 /* Register table */ 154 ret = register_pernet_subsys(&iptable_filter_net_ops);
135 ret = ipt_register_table(&packet_filter, &initial_table.repl);
136 if (ret < 0) 155 if (ret < 0)
137 return ret; 156 return ret;
138 157
@@ -144,14 +163,14 @@ static int __init iptable_filter_init(void)
144 return ret; 163 return ret;
145 164
146 cleanup_table: 165 cleanup_table:
147 ipt_unregister_table(&packet_filter); 166 unregister_pernet_subsys(&iptable_filter_net_ops);
148 return ret; 167 return ret;
149} 168}
150 169
151static void __exit iptable_filter_fini(void) 170static void __exit iptable_filter_fini(void)
152{ 171{
153 nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); 172 nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
154 ipt_unregister_table(&packet_filter); 173 unregister_pernet_subsys(&iptable_filter_net_ops);
155} 174}
156 175
157module_init(iptable_filter_init); 176module_init(iptable_filter_init);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 5c4be202430c..c55a210853a7 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -33,7 +33,7 @@ static struct
33 struct ipt_replace repl; 33 struct ipt_replace repl;
34 struct ipt_standard entries[5]; 34 struct ipt_standard entries[5];
35 struct ipt_error term; 35 struct ipt_error term;
36} initial_table __initdata = { 36} initial_table __net_initdata = {
37 .repl = { 37 .repl = {
38 .name = "mangle", 38 .name = "mangle",
39 .valid_hooks = MANGLE_VALID_HOOKS, 39 .valid_hooks = MANGLE_VALID_HOOKS,
@@ -80,7 +80,7 @@ ipt_route_hook(unsigned int hook,
80 const struct net_device *out, 80 const struct net_device *out,
81 int (*okfn)(struct sk_buff *)) 81 int (*okfn)(struct sk_buff *))
82{ 82{
83 return ipt_do_table(skb, hook, in, out, &packet_mangler); 83 return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_mangle);
84} 84}
85 85
86static unsigned int 86static unsigned int
@@ -112,7 +112,7 @@ ipt_local_hook(unsigned int hook,
112 daddr = iph->daddr; 112 daddr = iph->daddr;
113 tos = iph->tos; 113 tos = iph->tos;
114 114
115 ret = ipt_do_table(skb, hook, in, out, &packet_mangler); 115 ret = ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_mangle);
116 /* Reroute for ANY change. */ 116 /* Reroute for ANY change. */
117 if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { 117 if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
118 iph = ip_hdr(skb); 118 iph = ip_hdr(skb);
@@ -166,12 +166,31 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = {
166 }, 166 },
167}; 167};
168 168
169static int __net_init iptable_mangle_net_init(struct net *net)
170{
171 /* Register table */
172 net->ipv4.iptable_mangle =
173 ipt_register_table(net, &packet_mangler, &initial_table.repl);
174 if (IS_ERR(net->ipv4.iptable_mangle))
175 return PTR_ERR(net->ipv4.iptable_mangle);
176 return 0;
177}
178
179static void __net_exit iptable_mangle_net_exit(struct net *net)
180{
181 ipt_unregister_table(net->ipv4.iptable_mangle);
182}
183
184static struct pernet_operations iptable_mangle_net_ops = {
185 .init = iptable_mangle_net_init,
186 .exit = iptable_mangle_net_exit,
187};
188
169static int __init iptable_mangle_init(void) 189static int __init iptable_mangle_init(void)
170{ 190{
171 int ret; 191 int ret;
172 192
173 /* Register table */ 193 ret = register_pernet_subsys(&iptable_mangle_net_ops);
174 ret = ipt_register_table(&packet_mangler, &initial_table.repl);
175 if (ret < 0) 194 if (ret < 0)
176 return ret; 195 return ret;
177 196
@@ -183,14 +202,14 @@ static int __init iptable_mangle_init(void)
183 return ret; 202 return ret;
184 203
185 cleanup_table: 204 cleanup_table:
186 ipt_unregister_table(&packet_mangler); 205 unregister_pernet_subsys(&iptable_mangle_net_ops);
187 return ret; 206 return ret;
188} 207}
189 208
190static void __exit iptable_mangle_fini(void) 209static void __exit iptable_mangle_fini(void)
191{ 210{
192 nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); 211 nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
193 ipt_unregister_table(&packet_mangler); 212 unregister_pernet_subsys(&iptable_mangle_net_ops);
194} 213}
195 214
196module_init(iptable_mangle_init); 215module_init(iptable_mangle_init);
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index dc34aa274533..e41fe8ca4e1c 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -14,7 +14,7 @@ static struct
14 struct ipt_replace repl; 14 struct ipt_replace repl;
15 struct ipt_standard entries[2]; 15 struct ipt_standard entries[2];
16 struct ipt_error term; 16 struct ipt_error term;
17} initial_table __initdata = { 17} initial_table __net_initdata = {
18 .repl = { 18 .repl = {
19 .name = "raw", 19 .name = "raw",
20 .valid_hooks = RAW_VALID_HOOKS, 20 .valid_hooks = RAW_VALID_HOOKS,
@@ -52,7 +52,7 @@ ipt_hook(unsigned int hook,
52 const struct net_device *out, 52 const struct net_device *out,
53 int (*okfn)(struct sk_buff *)) 53 int (*okfn)(struct sk_buff *))
54{ 54{
55 return ipt_do_table(skb, hook, in, out, &packet_raw); 55 return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_raw);
56} 56}
57 57
58static unsigned int 58static unsigned int
@@ -70,7 +70,7 @@ ipt_local_hook(unsigned int hook,
70 "packet.\n"); 70 "packet.\n");
71 return NF_ACCEPT; 71 return NF_ACCEPT;
72 } 72 }
73 return ipt_do_table(skb, hook, in, out, &packet_raw); 73 return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_raw);
74} 74}
75 75
76/* 'raw' is the very first table. */ 76/* 'raw' is the very first table. */
@@ -91,12 +91,31 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = {
91 }, 91 },
92}; 92};
93 93
94static int __net_init iptable_raw_net_init(struct net *net)
95{
96 /* Register table */
97 net->ipv4.iptable_raw =
98 ipt_register_table(net, &packet_raw, &initial_table.repl);
99 if (IS_ERR(net->ipv4.iptable_raw))
100 return PTR_ERR(net->ipv4.iptable_raw);
101 return 0;
102}
103
104static void __net_exit iptable_raw_net_exit(struct net *net)
105{
106 ipt_unregister_table(net->ipv4.iptable_raw);
107}
108
109static struct pernet_operations iptable_raw_net_ops = {
110 .init = iptable_raw_net_init,
111 .exit = iptable_raw_net_exit,
112};
113
94static int __init iptable_raw_init(void) 114static int __init iptable_raw_init(void)
95{ 115{
96 int ret; 116 int ret;
97 117
98 /* Register table */ 118 ret = register_pernet_subsys(&iptable_raw_net_ops);
99 ret = ipt_register_table(&packet_raw, &initial_table.repl);
100 if (ret < 0) 119 if (ret < 0)
101 return ret; 120 return ret;
102 121
@@ -108,14 +127,14 @@ static int __init iptable_raw_init(void)
108 return ret; 127 return ret;
109 128
110 cleanup_table: 129 cleanup_table:
111 ipt_unregister_table(&packet_raw); 130 unregister_pernet_subsys(&iptable_raw_net_ops);
112 return ret; 131 return ret;
113} 132}
114 133
115static void __exit iptable_raw_fini(void) 134static void __exit iptable_raw_fini(void)
116{ 135{
117 nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); 136 nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
118 ipt_unregister_table(&packet_raw); 137 unregister_pernet_subsys(&iptable_raw_net_ops);
119} 138}
120 139
121module_init(iptable_raw_init); 140module_init(iptable_raw_init);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index ac3d61d8026e..a65b845c5f15 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -27,7 +27,8 @@
27static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, 27static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
28 struct nf_conntrack_tuple *tuple) 28 struct nf_conntrack_tuple *tuple)
29{ 29{
30 __be32 _addrs[2], *ap; 30 const __be32 *ap;
31 __be32 _addrs[2];
31 ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr), 32 ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
32 sizeof(u_int32_t) * 2, _addrs); 33 sizeof(u_int32_t) * 2, _addrs);
33 if (ap == NULL) 34 if (ap == NULL)
@@ -76,7 +77,8 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
76static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 77static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
77 unsigned int *dataoff, u_int8_t *protonum) 78 unsigned int *dataoff, u_int8_t *protonum)
78{ 79{
79 struct iphdr _iph, *iph; 80 const struct iphdr *iph;
81 struct iphdr _iph;
80 82
81 iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); 83 iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
82 if (iph == NULL) 84 if (iph == NULL)
@@ -111,8 +113,8 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum,
111{ 113{
112 struct nf_conn *ct; 114 struct nf_conn *ct;
113 enum ip_conntrack_info ctinfo; 115 enum ip_conntrack_info ctinfo;
114 struct nf_conn_help *help; 116 const struct nf_conn_help *help;
115 struct nf_conntrack_helper *helper; 117 const struct nf_conntrack_helper *helper;
116 118
117 /* This is where we call the helper: as the packet goes out. */ 119 /* This is where we call the helper: as the packet goes out. */
118 ct = nf_ct_get(skb, &ctinfo); 120 ct = nf_ct_get(skb, &ctinfo);
@@ -299,8 +301,8 @@ static ctl_table ip_ct_sysctl_table[] = {
299static int 301static int
300getorigdst(struct sock *sk, int optval, void __user *user, int *len) 302getorigdst(struct sock *sk, int optval, void __user *user, int *len)
301{ 303{
302 struct inet_sock *inet = inet_sk(sk); 304 const struct inet_sock *inet = inet_sk(sk);
303 struct nf_conntrack_tuple_hash *h; 305 const struct nf_conntrack_tuple_hash *h;
304 struct nf_conntrack_tuple tuple; 306 struct nf_conntrack_tuple tuple;
305 307
306 NF_CT_TUPLE_U_BLANK(&tuple); 308 NF_CT_TUPLE_U_BLANK(&tuple);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 543c02b74c96..089252e82c01 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -39,12 +39,14 @@ struct ct_iter_state {
39static struct hlist_node *ct_get_first(struct seq_file *seq) 39static struct hlist_node *ct_get_first(struct seq_file *seq)
40{ 40{
41 struct ct_iter_state *st = seq->private; 41 struct ct_iter_state *st = seq->private;
42 struct hlist_node *n;
42 43
43 for (st->bucket = 0; 44 for (st->bucket = 0;
44 st->bucket < nf_conntrack_htable_size; 45 st->bucket < nf_conntrack_htable_size;
45 st->bucket++) { 46 st->bucket++) {
46 if (!hlist_empty(&nf_conntrack_hash[st->bucket])) 47 n = rcu_dereference(nf_conntrack_hash[st->bucket].first);
47 return nf_conntrack_hash[st->bucket].first; 48 if (n)
49 return n;
48 } 50 }
49 return NULL; 51 return NULL;
50} 52}
@@ -54,11 +56,11 @@ static struct hlist_node *ct_get_next(struct seq_file *seq,
54{ 56{
55 struct ct_iter_state *st = seq->private; 57 struct ct_iter_state *st = seq->private;
56 58
57 head = head->next; 59 head = rcu_dereference(head->next);
58 while (head == NULL) { 60 while (head == NULL) {
59 if (++st->bucket >= nf_conntrack_htable_size) 61 if (++st->bucket >= nf_conntrack_htable_size)
60 return NULL; 62 return NULL;
61 head = nf_conntrack_hash[st->bucket].first; 63 head = rcu_dereference(nf_conntrack_hash[st->bucket].first);
62 } 64 }
63 return head; 65 return head;
64} 66}
@@ -74,8 +76,9 @@ static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos)
74} 76}
75 77
76static void *ct_seq_start(struct seq_file *seq, loff_t *pos) 78static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
79 __acquires(RCU)
77{ 80{
78 read_lock_bh(&nf_conntrack_lock); 81 rcu_read_lock();
79 return ct_get_idx(seq, *pos); 82 return ct_get_idx(seq, *pos);
80} 83}
81 84
@@ -86,16 +89,17 @@ static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
86} 89}
87 90
88static void ct_seq_stop(struct seq_file *s, void *v) 91static void ct_seq_stop(struct seq_file *s, void *v)
92 __releases(RCU)
89{ 93{
90 read_unlock_bh(&nf_conntrack_lock); 94 rcu_read_unlock();
91} 95}
92 96
93static int ct_seq_show(struct seq_file *s, void *v) 97static int ct_seq_show(struct seq_file *s, void *v)
94{ 98{
95 const struct nf_conntrack_tuple_hash *hash = v; 99 const struct nf_conntrack_tuple_hash *hash = v;
96 const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); 100 const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
97 struct nf_conntrack_l3proto *l3proto; 101 const struct nf_conntrack_l3proto *l3proto;
98 struct nf_conntrack_l4proto *l4proto; 102 const struct nf_conntrack_l4proto *l4proto;
99 103
100 NF_CT_ASSERT(ct); 104 NF_CT_ASSERT(ct);
101 105
@@ -191,10 +195,12 @@ struct ct_expect_iter_state {
191static struct hlist_node *ct_expect_get_first(struct seq_file *seq) 195static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
192{ 196{
193 struct ct_expect_iter_state *st = seq->private; 197 struct ct_expect_iter_state *st = seq->private;
198 struct hlist_node *n;
194 199
195 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { 200 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
196 if (!hlist_empty(&nf_ct_expect_hash[st->bucket])) 201 n = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
197 return nf_ct_expect_hash[st->bucket].first; 202 if (n)
203 return n;
198 } 204 }
199 return NULL; 205 return NULL;
200} 206}
@@ -204,11 +210,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
204{ 210{
205 struct ct_expect_iter_state *st = seq->private; 211 struct ct_expect_iter_state *st = seq->private;
206 212
207 head = head->next; 213 head = rcu_dereference(head->next);
208 while (head == NULL) { 214 while (head == NULL) {
209 if (++st->bucket >= nf_ct_expect_hsize) 215 if (++st->bucket >= nf_ct_expect_hsize)
210 return NULL; 216 return NULL;
211 head = nf_ct_expect_hash[st->bucket].first; 217 head = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
212 } 218 }
213 return head; 219 return head;
214} 220}
@@ -224,8 +230,9 @@ static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
224} 230}
225 231
226static void *exp_seq_start(struct seq_file *seq, loff_t *pos) 232static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
233 __acquires(RCU)
227{ 234{
228 read_lock_bh(&nf_conntrack_lock); 235 rcu_read_lock();
229 return ct_expect_get_idx(seq, *pos); 236 return ct_expect_get_idx(seq, *pos);
230} 237}
231 238
@@ -236,14 +243,15 @@ static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
236} 243}
237 244
238static void exp_seq_stop(struct seq_file *seq, void *v) 245static void exp_seq_stop(struct seq_file *seq, void *v)
246 __releases(RCU)
239{ 247{
240 read_unlock_bh(&nf_conntrack_lock); 248 rcu_read_unlock();
241} 249}
242 250
243static int exp_seq_show(struct seq_file *s, void *v) 251static int exp_seq_show(struct seq_file *s, void *v)
244{ 252{
245 struct nf_conntrack_expect *exp; 253 struct nf_conntrack_expect *exp;
246 struct hlist_node *n = v; 254 const struct hlist_node *n = v;
247 255
248 exp = hlist_entry(n, struct nf_conntrack_expect, hnode); 256 exp = hlist_entry(n, struct nf_conntrack_expect, hnode);
249 257
@@ -324,7 +332,7 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
324static int ct_cpu_seq_show(struct seq_file *seq, void *v) 332static int ct_cpu_seq_show(struct seq_file *seq, void *v)
325{ 333{
326 unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); 334 unsigned int nr_conntracks = atomic_read(&nf_conntrack_count);
327 struct ip_conntrack_stat *st = v; 335 const struct ip_conntrack_stat *st = v;
328 336
329 if (v == SEQ_START_TOKEN) { 337 if (v == SEQ_START_TOKEN) {
330 seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n"); 338 seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n");
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 4004a04c5510..6873fddb3529 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -26,7 +26,8 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb,
26 unsigned int dataoff, 26 unsigned int dataoff,
27 struct nf_conntrack_tuple *tuple) 27 struct nf_conntrack_tuple *tuple)
28{ 28{
29 struct icmphdr _hdr, *hp; 29 const struct icmphdr *hp;
30 struct icmphdr _hdr;
30 31
31 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); 32 hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
32 if (hp == NULL) 33 if (hp == NULL)
@@ -100,7 +101,7 @@ static int icmp_packet(struct nf_conn *ct,
100} 101}
101 102
102/* Called when a new connection for this protocol found. */ 103/* Called when a new connection for this protocol found. */
103static int icmp_new(struct nf_conn *conntrack, 104static int icmp_new(struct nf_conn *ct,
104 const struct sk_buff *skb, unsigned int dataoff) 105 const struct sk_buff *skb, unsigned int dataoff)
105{ 106{
106 static const u_int8_t valid_new[] = { 107 static const u_int8_t valid_new[] = {
@@ -110,15 +111,15 @@ static int icmp_new(struct nf_conn *conntrack,
110 [ICMP_ADDRESS] = 1 111 [ICMP_ADDRESS] = 1
111 }; 112 };
112 113
113 if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) 114 if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
114 || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { 115 || !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) {
115 /* Can't create a new ICMP `conn' with this. */ 116 /* Can't create a new ICMP `conn' with this. */
116 pr_debug("icmp: can't create new conn with type %u\n", 117 pr_debug("icmp: can't create new conn with type %u\n",
117 conntrack->tuplehash[0].tuple.dst.u.icmp.type); 118 ct->tuplehash[0].tuple.dst.u.icmp.type);
118 NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); 119 NF_CT_DUMP_TUPLE(&ct->tuplehash[0].tuple);
119 return 0; 120 return 0;
120 } 121 }
121 atomic_set(&conntrack->proto.icmp.count, 0); 122 atomic_set(&ct->proto.icmp.count, 0);
122 return 1; 123 return 1;
123} 124}
124 125
@@ -129,8 +130,8 @@ icmp_error_message(struct sk_buff *skb,
129 unsigned int hooknum) 130 unsigned int hooknum)
130{ 131{
131 struct nf_conntrack_tuple innertuple, origtuple; 132 struct nf_conntrack_tuple innertuple, origtuple;
132 struct nf_conntrack_l4proto *innerproto; 133 const struct nf_conntrack_l4proto *innerproto;
133 struct nf_conntrack_tuple_hash *h; 134 const struct nf_conntrack_tuple_hash *h;
134 135
135 NF_CT_ASSERT(skb->nfct == NULL); 136 NF_CT_ASSERT(skb->nfct == NULL);
136 137
@@ -176,7 +177,8 @@ static int
176icmp_error(struct sk_buff *skb, unsigned int dataoff, 177icmp_error(struct sk_buff *skb, unsigned int dataoff,
177 enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) 178 enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum)
178{ 179{
179 struct icmphdr _ih, *icmph; 180 const struct icmphdr *icmph;
181 struct icmphdr _ih;
180 182
181 /* Not enough header? */ 183 /* Not enough header? */
182 icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); 184 icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index e53ae1ef8f5e..dd07362d2b8f 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -31,7 +31,7 @@
31#include <net/netfilter/nf_conntrack_l3proto.h> 31#include <net/netfilter/nf_conntrack_l3proto.h>
32#include <net/netfilter/nf_conntrack_l4proto.h> 32#include <net/netfilter/nf_conntrack_l4proto.h>
33 33
34static DEFINE_RWLOCK(nf_nat_lock); 34static DEFINE_SPINLOCK(nf_nat_lock);
35 35
36static struct nf_conntrack_l3proto *l3proto __read_mostly; 36static struct nf_conntrack_l3proto *l3proto __read_mostly;
37 37
@@ -154,8 +154,8 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple,
154 struct nf_conn *ct; 154 struct nf_conn *ct;
155 struct hlist_node *n; 155 struct hlist_node *n;
156 156
157 read_lock_bh(&nf_nat_lock); 157 rcu_read_lock();
158 hlist_for_each_entry(nat, n, &bysource[h], bysource) { 158 hlist_for_each_entry_rcu(nat, n, &bysource[h], bysource) {
159 ct = nat->ct; 159 ct = nat->ct;
160 if (same_src(ct, tuple)) { 160 if (same_src(ct, tuple)) {
161 /* Copy source part from reply tuple. */ 161 /* Copy source part from reply tuple. */
@@ -164,12 +164,12 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple,
164 result->dst = tuple->dst; 164 result->dst = tuple->dst;
165 165
166 if (in_range(result, range)) { 166 if (in_range(result, range)) {
167 read_unlock_bh(&nf_nat_lock); 167 rcu_read_unlock();
168 return 1; 168 return 1;
169 } 169 }
170 } 170 }
171 } 171 }
172 read_unlock_bh(&nf_nat_lock); 172 rcu_read_unlock();
173 return 0; 173 return 0;
174} 174}
175 175
@@ -330,12 +330,12 @@ nf_nat_setup_info(struct nf_conn *ct,
330 unsigned int srchash; 330 unsigned int srchash;
331 331
332 srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 332 srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
333 write_lock_bh(&nf_nat_lock); 333 spin_lock_bh(&nf_nat_lock);
334 /* nf_conntrack_alter_reply might re-allocate exntension aera */ 334 /* nf_conntrack_alter_reply might re-allocate exntension aera */
335 nat = nfct_nat(ct); 335 nat = nfct_nat(ct);
336 nat->ct = ct; 336 nat->ct = ct;
337 hlist_add_head(&nat->bysource, &bysource[srchash]); 337 hlist_add_head_rcu(&nat->bysource, &bysource[srchash]);
338 write_unlock_bh(&nf_nat_lock); 338 spin_unlock_bh(&nf_nat_lock);
339 } 339 }
340 340
341 /* It's done. */ 341 /* It's done. */
@@ -521,14 +521,14 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
521{ 521{
522 int ret = 0; 522 int ret = 0;
523 523
524 write_lock_bh(&nf_nat_lock); 524 spin_lock_bh(&nf_nat_lock);
525 if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) { 525 if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) {
526 ret = -EBUSY; 526 ret = -EBUSY;
527 goto out; 527 goto out;
528 } 528 }
529 rcu_assign_pointer(nf_nat_protos[proto->protonum], proto); 529 rcu_assign_pointer(nf_nat_protos[proto->protonum], proto);
530 out: 530 out:
531 write_unlock_bh(&nf_nat_lock); 531 spin_unlock_bh(&nf_nat_lock);
532 return ret; 532 return ret;
533} 533}
534EXPORT_SYMBOL(nf_nat_protocol_register); 534EXPORT_SYMBOL(nf_nat_protocol_register);
@@ -536,10 +536,10 @@ EXPORT_SYMBOL(nf_nat_protocol_register);
536/* Noone stores the protocol anywhere; simply delete it. */ 536/* Noone stores the protocol anywhere; simply delete it. */
537void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto) 537void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
538{ 538{
539 write_lock_bh(&nf_nat_lock); 539 spin_lock_bh(&nf_nat_lock);
540 rcu_assign_pointer(nf_nat_protos[proto->protonum], 540 rcu_assign_pointer(nf_nat_protos[proto->protonum],
541 &nf_nat_unknown_protocol); 541 &nf_nat_unknown_protocol);
542 write_unlock_bh(&nf_nat_lock); 542 spin_unlock_bh(&nf_nat_lock);
543 synchronize_rcu(); 543 synchronize_rcu();
544} 544}
545EXPORT_SYMBOL(nf_nat_protocol_unregister); 545EXPORT_SYMBOL(nf_nat_protocol_unregister);
@@ -594,10 +594,10 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
594 594
595 NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK); 595 NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK);
596 596
597 write_lock_bh(&nf_nat_lock); 597 spin_lock_bh(&nf_nat_lock);
598 hlist_del(&nat->bysource); 598 hlist_del_rcu(&nat->bysource);
599 nat->ct = NULL; 599 nat->ct = NULL;
600 write_unlock_bh(&nf_nat_lock); 600 spin_unlock_bh(&nf_nat_lock);
601} 601}
602 602
603static void nf_nat_move_storage(struct nf_conn *conntrack, void *old) 603static void nf_nat_move_storage(struct nf_conn *conntrack, void *old)
@@ -609,10 +609,10 @@ static void nf_nat_move_storage(struct nf_conn *conntrack, void *old)
609 if (!ct || !(ct->status & IPS_NAT_DONE_MASK)) 609 if (!ct || !(ct->status & IPS_NAT_DONE_MASK))
610 return; 610 return;
611 611
612 write_lock_bh(&nf_nat_lock); 612 spin_lock_bh(&nf_nat_lock);
613 hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); 613 hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
614 new_nat->ct = ct; 614 new_nat->ct = ct;
615 write_unlock_bh(&nf_nat_lock); 615 spin_unlock_bh(&nf_nat_lock);
616} 616}
617 617
618static struct nf_ct_ext_type nat_extend __read_mostly = { 618static struct nf_ct_ext_type nat_extend __read_mostly = {
@@ -646,17 +646,13 @@ static int __init nf_nat_init(void)
646 } 646 }
647 647
648 /* Sew in builtin protocols. */ 648 /* Sew in builtin protocols. */
649 write_lock_bh(&nf_nat_lock); 649 spin_lock_bh(&nf_nat_lock);
650 for (i = 0; i < MAX_IP_NAT_PROTO; i++) 650 for (i = 0; i < MAX_IP_NAT_PROTO; i++)
651 rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol); 651 rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol);
652 rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp); 652 rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
653 rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp); 653 rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
654 rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp); 654 rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
655 write_unlock_bh(&nf_nat_lock); 655 spin_unlock_bh(&nf_nat_lock);
656
657 for (i = 0; i < nf_nat_htable_size; i++) {
658 INIT_HLIST_HEAD(&bysource[i]);
659 }
660 656
661 /* Initialize fake conntrack so that NAT will skip it */ 657 /* Initialize fake conntrack so that NAT will skip it */
662 nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; 658 nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index a121989fdad7..ee47bf28c825 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -32,7 +32,8 @@ static int set_addr(struct sk_buff *skb,
32 __be32 ip; 32 __be32 ip;
33 __be16 port; 33 __be16 port;
34 } __attribute__ ((__packed__)) buf; 34 } __attribute__ ((__packed__)) buf;
35 struct tcphdr _tcph, *th; 35 const struct tcphdr *th;
36 struct tcphdr _tcph;
36 37
37 buf.ip = ip; 38 buf.ip = ip;
38 buf.port = port; 39 buf.port = port;
@@ -99,7 +100,7 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
99 unsigned char **data, 100 unsigned char **data,
100 TransportAddress *taddr, int count) 101 TransportAddress *taddr, int count)
101{ 102{
102 struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; 103 const struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
103 int dir = CTINFO2DIR(ctinfo); 104 int dir = CTINFO2DIR(ctinfo);
104 int i; 105 int i;
105 __be16 port; 106 __be16 port;
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 4c0232842e75..ca57f47bbd25 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -44,8 +44,7 @@ adjust_tcp_sequence(u32 seq,
44 struct nf_nat_seq *this_way, *other_way; 44 struct nf_nat_seq *this_way, *other_way;
45 struct nf_conn_nat *nat = nfct_nat(ct); 45 struct nf_conn_nat *nat = nfct_nat(ct);
46 46
47 pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", 47 pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", seq, seq);
48 ntohl(seq), seq);
49 48
50 dir = CTINFO2DIR(ctinfo); 49 dir = CTINFO2DIR(ctinfo);
51 50
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index e63b944a2ebb..3a1e6d6afc0a 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -40,11 +40,11 @@ MODULE_ALIAS("ip_nat_pptp");
40static void pptp_nat_expected(struct nf_conn *ct, 40static void pptp_nat_expected(struct nf_conn *ct,
41 struct nf_conntrack_expect *exp) 41 struct nf_conntrack_expect *exp)
42{ 42{
43 struct nf_conn *master = ct->master; 43 const struct nf_conn *master = ct->master;
44 struct nf_conntrack_expect *other_exp; 44 struct nf_conntrack_expect *other_exp;
45 struct nf_conntrack_tuple t; 45 struct nf_conntrack_tuple t;
46 struct nf_ct_pptp_master *ct_pptp_info; 46 const struct nf_ct_pptp_master *ct_pptp_info;
47 struct nf_nat_pptp *nat_pptp_info; 47 const struct nf_nat_pptp *nat_pptp_info;
48 struct nf_nat_range range; 48 struct nf_nat_range range;
49 49
50 ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; 50 ct_pptp_info = &nfct_help(master)->help.ct_pptp_info;
@@ -186,7 +186,7 @@ static void
186pptp_exp_gre(struct nf_conntrack_expect *expect_orig, 186pptp_exp_gre(struct nf_conntrack_expect *expect_orig,
187 struct nf_conntrack_expect *expect_reply) 187 struct nf_conntrack_expect *expect_reply)
188{ 188{
189 struct nf_conn *ct = expect_orig->master; 189 const struct nf_conn *ct = expect_orig->master;
190 struct nf_ct_pptp_master *ct_pptp_info; 190 struct nf_ct_pptp_master *ct_pptp_info;
191 struct nf_nat_pptp *nat_pptp_info; 191 struct nf_nat_pptp *nat_pptp_info;
192 192
@@ -217,7 +217,7 @@ pptp_inbound_pkt(struct sk_buff *skb,
217 struct PptpControlHeader *ctlh, 217 struct PptpControlHeader *ctlh,
218 union pptp_ctrl_union *pptpReq) 218 union pptp_ctrl_union *pptpReq)
219{ 219{
220 struct nf_nat_pptp *nat_pptp_info; 220 const struct nf_nat_pptp *nat_pptp_info;
221 u_int16_t msg; 221 u_int16_t msg;
222 __be16 new_pcid; 222 __be16 new_pcid;
223 unsigned int pcid_off; 223 unsigned int pcid_off;
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index 9fa272e73113..a1e4da16da2e 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -59,7 +59,7 @@ static int
59gre_unique_tuple(struct nf_conntrack_tuple *tuple, 59gre_unique_tuple(struct nf_conntrack_tuple *tuple,
60 const struct nf_nat_range *range, 60 const struct nf_nat_range *range,
61 enum nf_nat_manip_type maniptype, 61 enum nf_nat_manip_type maniptype,
62 const struct nf_conn *conntrack) 62 const struct nf_conn *ct)
63{ 63{
64 static u_int16_t key; 64 static u_int16_t key;
65 __be16 *keyptr; 65 __be16 *keyptr;
@@ -67,7 +67,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
67 67
68 /* If there is no master conntrack we are not PPTP, 68 /* If there is no master conntrack we are not PPTP,
69 do not change tuples */ 69 do not change tuples */
70 if (!conntrack->master) 70 if (!ct->master)
71 return 0; 71 return 0;
72 72
73 if (maniptype == IP_NAT_MANIP_SRC) 73 if (maniptype == IP_NAT_MANIP_SRC)
@@ -76,7 +76,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
76 keyptr = &tuple->dst.u.gre.key; 76 keyptr = &tuple->dst.u.gre.key;
77 77
78 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { 78 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
79 pr_debug("%p: NATing GRE PPTP\n", conntrack); 79 pr_debug("%p: NATing GRE PPTP\n", ct);
80 min = 1; 80 min = 1;
81 range_size = 0xffff; 81 range_size = 0xffff;
82 } else { 82 } else {
@@ -88,11 +88,11 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
88 88
89 for (i = 0; i < range_size; i++, key++) { 89 for (i = 0; i < range_size; i++, key++) {
90 *keyptr = htons(min + key % range_size); 90 *keyptr = htons(min + key % range_size);
91 if (!nf_nat_used_tuple(tuple, conntrack)) 91 if (!nf_nat_used_tuple(tuple, ct))
92 return 1; 92 return 1;
93 } 93 }
94 94
95 pr_debug("%p: no NAT mapping\n", conntrack); 95 pr_debug("%p: no NAT mapping\n", ct);
96 return 0; 96 return 0;
97} 97}
98 98
@@ -104,7 +104,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
104{ 104{
105 struct gre_hdr *greh; 105 struct gre_hdr *greh;
106 struct gre_hdr_pptp *pgreh; 106 struct gre_hdr_pptp *pgreh;
107 struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); 107 const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
108 unsigned int hdroff = iphdroff + iph->ihl * 4; 108 unsigned int hdroff = iphdroff + iph->ihl * 4;
109 109
110 /* pgreh includes two optional 32bit fields which are not required 110 /* pgreh includes two optional 32bit fields which are not required
@@ -148,12 +148,12 @@ static const struct nf_nat_protocol gre = {
148#endif 148#endif
149}; 149};
150 150
151int __init nf_nat_proto_gre_init(void) 151static int __init nf_nat_proto_gre_init(void)
152{ 152{
153 return nf_nat_protocol_register(&gre); 153 return nf_nat_protocol_register(&gre);
154} 154}
155 155
156void __exit nf_nat_proto_gre_fini(void) 156static void __exit nf_nat_proto_gre_fini(void)
157{ 157{
158 nf_nat_protocol_unregister(&gre); 158 nf_nat_protocol_unregister(&gre);
159} 159}
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index a0e44c953cb6..03a02969aa57 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -57,7 +57,7 @@ icmp_manip_pkt(struct sk_buff *skb,
57 const struct nf_conntrack_tuple *tuple, 57 const struct nf_conntrack_tuple *tuple,
58 enum nf_nat_manip_type maniptype) 58 enum nf_nat_manip_type maniptype)
59{ 59{
60 struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); 60 const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
61 struct icmphdr *hdr; 61 struct icmphdr *hdr;
62 unsigned int hdroff = iphdroff + iph->ihl*4; 62 unsigned int hdroff = iphdroff + iph->ihl*4;
63 63
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index da23e9fbe679..ffd5d1589eca 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -93,7 +93,7 @@ tcp_manip_pkt(struct sk_buff *skb,
93 const struct nf_conntrack_tuple *tuple, 93 const struct nf_conntrack_tuple *tuple,
94 enum nf_nat_manip_type maniptype) 94 enum nf_nat_manip_type maniptype)
95{ 95{
96 struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); 96 const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
97 struct tcphdr *hdr; 97 struct tcphdr *hdr;
98 unsigned int hdroff = iphdroff + iph->ihl*4; 98 unsigned int hdroff = iphdroff + iph->ihl*4;
99 __be32 oldip, newip; 99 __be32 oldip, newip;
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index 10df4db078af..4b8f49910ff2 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -91,7 +91,7 @@ udp_manip_pkt(struct sk_buff *skb,
91 const struct nf_conntrack_tuple *tuple, 91 const struct nf_conntrack_tuple *tuple,
92 enum nf_nat_manip_type maniptype) 92 enum nf_nat_manip_type maniptype)
93{ 93{
94 struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); 94 const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
95 struct udphdr *hdr; 95 struct udphdr *hdr;
96 unsigned int hdroff = iphdroff + iph->ihl*4; 96 unsigned int hdroff = iphdroff + iph->ihl*4;
97 __be32 oldip, newip; 97 __be32 oldip, newip;
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 519182269e76..f8fda57ba20b 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -58,13 +58,14 @@ static struct
58 .term = IPT_ERROR_INIT, /* ERROR */ 58 .term = IPT_ERROR_INIT, /* ERROR */
59}; 59};
60 60
61static struct xt_table nat_table = { 61static struct xt_table __nat_table = {
62 .name = "nat", 62 .name = "nat",
63 .valid_hooks = NAT_VALID_HOOKS, 63 .valid_hooks = NAT_VALID_HOOKS,
64 .lock = RW_LOCK_UNLOCKED, 64 .lock = RW_LOCK_UNLOCKED,
65 .me = THIS_MODULE, 65 .me = THIS_MODULE,
66 .af = AF_INET, 66 .af = AF_INET,
67}; 67};
68static struct xt_table *nat_table;
68 69
69/* Source NAT */ 70/* Source NAT */
70static unsigned int ipt_snat_target(struct sk_buff *skb, 71static unsigned int ipt_snat_target(struct sk_buff *skb,
@@ -214,7 +215,7 @@ int nf_nat_rule_find(struct sk_buff *skb,
214{ 215{
215 int ret; 216 int ret;
216 217
217 ret = ipt_do_table(skb, hooknum, in, out, &nat_table); 218 ret = ipt_do_table(skb, hooknum, in, out, nat_table);
218 219
219 if (ret == NF_ACCEPT) { 220 if (ret == NF_ACCEPT) {
220 if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) 221 if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
@@ -248,9 +249,10 @@ int __init nf_nat_rule_init(void)
248{ 249{
249 int ret; 250 int ret;
250 251
251 ret = ipt_register_table(&nat_table, &nat_initial_table.repl); 252 nat_table = ipt_register_table(&init_net, &__nat_table,
252 if (ret != 0) 253 &nat_initial_table.repl);
253 return ret; 254 if (IS_ERR(nat_table))
255 return PTR_ERR(nat_table);
254 ret = xt_register_target(&ipt_snat_reg); 256 ret = xt_register_target(&ipt_snat_reg);
255 if (ret != 0) 257 if (ret != 0)
256 goto unregister_table; 258 goto unregister_table;
@@ -264,7 +266,7 @@ int __init nf_nat_rule_init(void)
264 unregister_snat: 266 unregister_snat:
265 xt_unregister_target(&ipt_snat_reg); 267 xt_unregister_target(&ipt_snat_reg);
266 unregister_table: 268 unregister_table:
267 ipt_unregister_table(&nat_table); 269 ipt_unregister_table(nat_table);
268 270
269 return ret; 271 return ret;
270} 272}
@@ -273,5 +275,5 @@ void nf_nat_rule_cleanup(void)
273{ 275{
274 xt_unregister_target(&ipt_dnat_reg); 276 xt_unregister_target(&ipt_dnat_reg);
275 xt_unregister_target(&ipt_snat_reg); 277 xt_unregister_target(&ipt_snat_reg);
276 ipt_unregister_table(&nat_table); 278 ipt_unregister_table(nat_table);
277} 279}
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 606a170bf4ca..b4c8d4968bb2 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -35,9 +35,9 @@ struct addr_map {
35 } addr[IP_CT_DIR_MAX]; 35 } addr[IP_CT_DIR_MAX];
36}; 36};
37 37
38static void addr_map_init(struct nf_conn *ct, struct addr_map *map) 38static void addr_map_init(const struct nf_conn *ct, struct addr_map *map)
39{ 39{
40 struct nf_conntrack_tuple *t; 40 const struct nf_conntrack_tuple *t;
41 enum ip_conntrack_dir dir; 41 enum ip_conntrack_dir dir;
42 unsigned int n; 42 unsigned int n;
43 43
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 07f2a49926d4..540ce6ae887c 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -260,7 +260,7 @@ static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc)
260{ 260{
261 unsigned char ch; 261 unsigned char ch;
262 262
263 if (eoc == 0) { 263 if (eoc == NULL) {
264 if (!asn1_octet_decode(ctx, &ch)) 264 if (!asn1_octet_decode(ctx, &ch))
265 return 0; 265 return 0;
266 266
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
index 1360a94766dd..b096e81500ae 100644
--- a/net/ipv4/netfilter/nf_nat_tftp.c
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -24,7 +24,7 @@ static unsigned int help(struct sk_buff *skb,
24 enum ip_conntrack_info ctinfo, 24 enum ip_conntrack_info ctinfo,
25 struct nf_conntrack_expect *exp) 25 struct nf_conntrack_expect *exp)
26{ 26{
27 struct nf_conn *ct = exp->master; 27 const struct nf_conn *ct = exp->master;
28 28
29 exp->saved_proto.udp.port 29 exp->saved_proto.udp.port
30 = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; 30 = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 85c08696abbe..a3002fe65b7f 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -352,6 +352,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
352 skb_reserve(skb, hh_len); 352 skb_reserve(skb, hh_len);
353 353
354 skb->priority = sk->sk_priority; 354 skb->priority = sk->sk_priority;
355 skb->mark = sk->sk_mark;
355 skb->dst = dst_clone(&rt->u.dst); 356 skb->dst = dst_clone(&rt->u.dst);
356 357
357 skb_reset_network_header(skb); 358 skb_reset_network_header(skb);
@@ -544,6 +545,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
544 545
545 { 546 {
546 struct flowi fl = { .oif = ipc.oif, 547 struct flowi fl = { .oif = ipc.oif,
548 .mark = sk->sk_mark,
547 .nl_u = { .ip4_u = 549 .nl_u = { .ip4_u =
548 { .daddr = daddr, 550 { .daddr = daddr,
549 .saddr = saddr, 551 .saddr = saddr,
@@ -860,8 +862,7 @@ static struct sock *raw_get_first(struct seq_file *seq)
860 struct hlist_node *node; 862 struct hlist_node *node;
861 863
862 sk_for_each(sk, node, &state->h->ht[state->bucket]) 864 sk_for_each(sk, node, &state->h->ht[state->bucket])
863 if (sk->sk_net == state->p.net && 865 if (sk->sk_net == state->p.net)
864 sk->sk_family == state->family)
865 goto found; 866 goto found;
866 } 867 }
867 sk = NULL; 868 sk = NULL;
@@ -877,8 +878,7 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
877 sk = sk_next(sk); 878 sk = sk_next(sk);
878try_again: 879try_again:
879 ; 880 ;
880 } while (sk && sk->sk_net != state->p.net && 881 } while (sk && sk->sk_net != state->p.net);
881 sk->sk_family != state->family);
882 882
883 if (!sk && ++state->bucket < RAW_HTABLE_SIZE) { 883 if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
884 sk = sk_head(&state->h->ht[state->bucket]); 884 sk = sk_head(&state->h->ht[state->bucket]);
@@ -927,7 +927,7 @@ void raw_seq_stop(struct seq_file *seq, void *v)
927} 927}
928EXPORT_SYMBOL_GPL(raw_seq_stop); 928EXPORT_SYMBOL_GPL(raw_seq_stop);
929 929
930static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i) 930static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
931{ 931{
932 struct inet_sock *inet = inet_sk(sp); 932 struct inet_sock *inet = inet_sk(sp);
933 __be32 dest = inet->daddr, 933 __be32 dest = inet->daddr,
@@ -935,33 +935,23 @@ static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i)
935 __u16 destp = 0, 935 __u16 destp = 0,
936 srcp = inet->num; 936 srcp = inet->num;
937 937
938 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" 938 seq_printf(seq, "%4d: %08X:%04X %08X:%04X"
939 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d", 939 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d",
940 i, src, srcp, dest, destp, sp->sk_state, 940 i, src, srcp, dest, destp, sp->sk_state,
941 atomic_read(&sp->sk_wmem_alloc), 941 atomic_read(&sp->sk_wmem_alloc),
942 atomic_read(&sp->sk_rmem_alloc), 942 atomic_read(&sp->sk_rmem_alloc),
943 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), 943 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
944 atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); 944 atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
945 return tmpbuf;
946} 945}
947 946
948#define TMPSZ 128
949
950static int raw_seq_show(struct seq_file *seq, void *v) 947static int raw_seq_show(struct seq_file *seq, void *v)
951{ 948{
952 char tmpbuf[TMPSZ+1];
953
954 if (v == SEQ_START_TOKEN) 949 if (v == SEQ_START_TOKEN)
955 seq_printf(seq, "%-*s\n", TMPSZ-1, 950 seq_printf(seq, " sl local_address rem_address st tx_queue "
956 " sl local_address rem_address st tx_queue " 951 "rx_queue tr tm->when retrnsmt uid timeout "
957 "rx_queue tr tm->when retrnsmt uid timeout " 952 "inode drops\n");
958 "inode drops"); 953 else
959 else { 954 raw_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
960 struct raw_iter_state *state = raw_seq_private(seq);
961
962 seq_printf(seq, "%-*s\n", TMPSZ-1,
963 get_raw_sock(v, tmpbuf, state->bucket));
964 }
965 return 0; 955 return 0;
966} 956}
967 957
@@ -972,27 +962,25 @@ static const struct seq_operations raw_seq_ops = {
972 .show = raw_seq_show, 962 .show = raw_seq_show,
973}; 963};
974 964
975int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h, 965int raw_seq_open(struct inode *ino, struct file *file,
976 unsigned short family) 966 struct raw_hashinfo *h, const struct seq_operations *ops)
977{ 967{
978 int err; 968 int err;
979 struct raw_iter_state *i; 969 struct raw_iter_state *i;
980 970
981 err = seq_open_net(ino, file, &raw_seq_ops, 971 err = seq_open_net(ino, file, ops, sizeof(struct raw_iter_state));
982 sizeof(struct raw_iter_state));
983 if (err < 0) 972 if (err < 0)
984 return err; 973 return err;
985 974
986 i = raw_seq_private((struct seq_file *)file->private_data); 975 i = raw_seq_private((struct seq_file *)file->private_data);
987 i->h = h; 976 i->h = h;
988 i->family = family;
989 return 0; 977 return 0;
990} 978}
991EXPORT_SYMBOL_GPL(raw_seq_open); 979EXPORT_SYMBOL_GPL(raw_seq_open);
992 980
993static int raw_v4_seq_open(struct inode *inode, struct file *file) 981static int raw_v4_seq_open(struct inode *inode, struct file *file)
994{ 982{
995 return raw_seq_open(inode, file, &raw_v4_hashinfo, PF_INET); 983 return raw_seq_open(inode, file, &raw_v4_hashinfo, &raw_seq_ops);
996} 984}
997 985
998static const struct file_operations raw_seq_fops = { 986static const struct file_operations raw_seq_fops = {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 896c768e41a2..8842ecb9be48 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -117,8 +117,6 @@
117 117
118#define RT_GC_TIMEOUT (300*HZ) 118#define RT_GC_TIMEOUT (300*HZ)
119 119
120static int ip_rt_min_delay = 2 * HZ;
121static int ip_rt_max_delay = 10 * HZ;
122static int ip_rt_max_size; 120static int ip_rt_max_size;
123static int ip_rt_gc_timeout = RT_GC_TIMEOUT; 121static int ip_rt_gc_timeout = RT_GC_TIMEOUT;
124static int ip_rt_gc_interval = 60 * HZ; 122static int ip_rt_gc_interval = 60 * HZ;
@@ -133,12 +131,9 @@ static int ip_rt_mtu_expires = 10 * 60 * HZ;
133static int ip_rt_min_pmtu = 512 + 20 + 20; 131static int ip_rt_min_pmtu = 512 + 20 + 20;
134static int ip_rt_min_advmss = 256; 132static int ip_rt_min_advmss = 256;
135static int ip_rt_secret_interval = 10 * 60 * HZ; 133static int ip_rt_secret_interval = 10 * 60 * HZ;
136static int ip_rt_flush_expected;
137static unsigned long rt_deadline;
138 134
139#define RTprint(a...) printk(KERN_DEBUG a) 135#define RTprint(a...) printk(KERN_DEBUG a)
140 136
141static struct timer_list rt_flush_timer;
142static void rt_worker_func(struct work_struct *work); 137static void rt_worker_func(struct work_struct *work);
143static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); 138static DECLARE_DELAYED_WORK(expires_work, rt_worker_func);
144static struct timer_list rt_secret_timer; 139static struct timer_list rt_secret_timer;
@@ -169,6 +164,7 @@ static struct dst_ops ipv4_dst_ops = {
169 .update_pmtu = ip_rt_update_pmtu, 164 .update_pmtu = ip_rt_update_pmtu,
170 .local_out = ip_local_out, 165 .local_out = ip_local_out,
171 .entry_size = sizeof(struct rtable), 166 .entry_size = sizeof(struct rtable),
167 .entries = ATOMIC_INIT(0),
172}; 168};
173 169
174#define ECN_OR_COST(class) TC_PRIO_##class 170#define ECN_OR_COST(class) TC_PRIO_##class
@@ -259,19 +255,16 @@ static inline void rt_hash_lock_init(void)
259static struct rt_hash_bucket *rt_hash_table; 255static struct rt_hash_bucket *rt_hash_table;
260static unsigned rt_hash_mask; 256static unsigned rt_hash_mask;
261static unsigned int rt_hash_log; 257static unsigned int rt_hash_log;
262static unsigned int rt_hash_rnd; 258static atomic_t rt_genid;
263 259
264static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); 260static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
265#define RT_CACHE_STAT_INC(field) \ 261#define RT_CACHE_STAT_INC(field) \
266 (__raw_get_cpu_var(rt_cache_stat).field++) 262 (__raw_get_cpu_var(rt_cache_stat).field++)
267 263
268static int rt_intern_hash(unsigned hash, struct rtable *rth,
269 struct rtable **res);
270
271static unsigned int rt_hash_code(u32 daddr, u32 saddr) 264static unsigned int rt_hash_code(u32 daddr, u32 saddr)
272{ 265{
273 return (jhash_2words(daddr, saddr, rt_hash_rnd) 266 return jhash_2words(daddr, saddr, atomic_read(&rt_genid))
274 & rt_hash_mask); 267 & rt_hash_mask;
275} 268}
276 269
277#define rt_hash(daddr, saddr, idx) \ 270#define rt_hash(daddr, saddr, idx) \
@@ -281,27 +274,28 @@ static unsigned int rt_hash_code(u32 daddr, u32 saddr)
281#ifdef CONFIG_PROC_FS 274#ifdef CONFIG_PROC_FS
282struct rt_cache_iter_state { 275struct rt_cache_iter_state {
283 int bucket; 276 int bucket;
277 int genid;
284}; 278};
285 279
286static struct rtable *rt_cache_get_first(struct seq_file *seq) 280static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st)
287{ 281{
288 struct rtable *r = NULL; 282 struct rtable *r = NULL;
289 struct rt_cache_iter_state *st = seq->private;
290 283
291 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { 284 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
292 rcu_read_lock_bh(); 285 rcu_read_lock_bh();
293 r = rt_hash_table[st->bucket].chain; 286 r = rcu_dereference(rt_hash_table[st->bucket].chain);
294 if (r) 287 while (r) {
295 break; 288 if (r->rt_genid == st->genid)
289 return r;
290 r = rcu_dereference(r->u.dst.rt_next);
291 }
296 rcu_read_unlock_bh(); 292 rcu_read_unlock_bh();
297 } 293 }
298 return rcu_dereference(r); 294 return r;
299} 295}
300 296
301static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r) 297static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct rtable *r)
302{ 298{
303 struct rt_cache_iter_state *st = seq->private;
304
305 r = r->u.dst.rt_next; 299 r = r->u.dst.rt_next;
306 while (!r) { 300 while (!r) {
307 rcu_read_unlock_bh(); 301 rcu_read_unlock_bh();
@@ -313,29 +307,38 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r)
313 return rcu_dereference(r); 307 return rcu_dereference(r);
314} 308}
315 309
316static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos) 310static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t pos)
317{ 311{
318 struct rtable *r = rt_cache_get_first(seq); 312 struct rtable *r = rt_cache_get_first(st);
319 313
320 if (r) 314 if (r)
321 while (pos && (r = rt_cache_get_next(seq, r))) 315 while (pos && (r = rt_cache_get_next(st, r))) {
316 if (r->rt_genid != st->genid)
317 continue;
322 --pos; 318 --pos;
319 }
323 return pos ? NULL : r; 320 return pos ? NULL : r;
324} 321}
325 322
326static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) 323static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
327{ 324{
328 return *pos ? rt_cache_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 325 struct rt_cache_iter_state *st = seq->private;
326
327 if (*pos)
328 return rt_cache_get_idx(st, *pos - 1);
329 st->genid = atomic_read(&rt_genid);
330 return SEQ_START_TOKEN;
329} 331}
330 332
331static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) 333static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
332{ 334{
333 struct rtable *r = NULL; 335 struct rtable *r;
336 struct rt_cache_iter_state *st = seq->private;
334 337
335 if (v == SEQ_START_TOKEN) 338 if (v == SEQ_START_TOKEN)
336 r = rt_cache_get_first(seq); 339 r = rt_cache_get_first(st);
337 else 340 else
338 r = rt_cache_get_next(seq, v); 341 r = rt_cache_get_next(st, v);
339 ++*pos; 342 ++*pos;
340 return r; 343 return r;
341} 344}
@@ -708,6 +711,11 @@ static void rt_check_expire(void)
708 continue; 711 continue;
709 spin_lock_bh(rt_hash_lock_addr(i)); 712 spin_lock_bh(rt_hash_lock_addr(i));
710 while ((rth = *rthp) != NULL) { 713 while ((rth = *rthp) != NULL) {
714 if (rth->rt_genid != atomic_read(&rt_genid)) {
715 *rthp = rth->u.dst.rt_next;
716 rt_free(rth);
717 continue;
718 }
711 if (rth->u.dst.expires) { 719 if (rth->u.dst.expires) {
712 /* Entry is expired even if it is in use */ 720 /* Entry is expired even if it is in use */
713 if (time_before_eq(jiffies, rth->u.dst.expires)) { 721 if (time_before_eq(jiffies, rth->u.dst.expires)) {
@@ -732,83 +740,45 @@ static void rt_check_expire(void)
732 740
733/* 741/*
734 * rt_worker_func() is run in process context. 742 * rt_worker_func() is run in process context.
735 * If a whole flush was scheduled, it is done. 743 * we call rt_check_expire() to scan part of the hash table
736 * Else, we call rt_check_expire() to scan part of the hash table
737 */ 744 */
738static void rt_worker_func(struct work_struct *work) 745static void rt_worker_func(struct work_struct *work)
739{ 746{
740 if (ip_rt_flush_expected) { 747 rt_check_expire();
741 ip_rt_flush_expected = 0;
742 rt_do_flush(1);
743 } else
744 rt_check_expire();
745 schedule_delayed_work(&expires_work, ip_rt_gc_interval); 748 schedule_delayed_work(&expires_work, ip_rt_gc_interval);
746} 749}
747 750
748/* This can run from both BH and non-BH contexts, the latter 751/*
749 * in the case of a forced flush event. 752 * Pertubation of rt_genid by a small quantity [1..256]
753 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
754 * many times (2^24) without giving recent rt_genid.
755 * Jenkins hash is strong enough that litle changes of rt_genid are OK.
750 */ 756 */
751static void rt_run_flush(unsigned long process_context) 757static void rt_cache_invalidate(void)
752{ 758{
753 rt_deadline = 0; 759 unsigned char shuffle;
754
755 get_random_bytes(&rt_hash_rnd, 4);
756 760
757 rt_do_flush(process_context); 761 get_random_bytes(&shuffle, sizeof(shuffle));
762 atomic_add(shuffle + 1U, &rt_genid);
758} 763}
759 764
760static DEFINE_SPINLOCK(rt_flush_lock); 765/*
761 766 * delay < 0 : invalidate cache (fast : entries will be deleted later)
767 * delay >= 0 : invalidate & flush cache (can be long)
768 */
762void rt_cache_flush(int delay) 769void rt_cache_flush(int delay)
763{ 770{
764 unsigned long now = jiffies; 771 rt_cache_invalidate();
765 int user_mode = !in_softirq(); 772 if (delay >= 0)
766 773 rt_do_flush(!in_softirq());
767 if (delay < 0)
768 delay = ip_rt_min_delay;
769
770 spin_lock_bh(&rt_flush_lock);
771
772 if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) {
773 long tmo = (long)(rt_deadline - now);
774
775 /* If flush timer is already running
776 and flush request is not immediate (delay > 0):
777
778 if deadline is not achieved, prolongate timer to "delay",
779 otherwise fire it at deadline time.
780 */
781
782 if (user_mode && tmo < ip_rt_max_delay-ip_rt_min_delay)
783 tmo = 0;
784
785 if (delay > tmo)
786 delay = tmo;
787 }
788
789 if (delay <= 0) {
790 spin_unlock_bh(&rt_flush_lock);
791 rt_run_flush(user_mode);
792 return;
793 }
794
795 if (rt_deadline == 0)
796 rt_deadline = now + ip_rt_max_delay;
797
798 mod_timer(&rt_flush_timer, now+delay);
799 spin_unlock_bh(&rt_flush_lock);
800} 774}
801 775
802/* 776/*
803 * We change rt_hash_rnd and ask next rt_worker_func() invocation 777 * We change rt_genid and let gc do the cleanup
804 * to perform a flush in process context
805 */ 778 */
806static void rt_secret_rebuild(unsigned long dummy) 779static void rt_secret_rebuild(unsigned long dummy)
807{ 780{
808 get_random_bytes(&rt_hash_rnd, 4); 781 rt_cache_invalidate();
809 ip_rt_flush_expected = 1;
810 cancel_delayed_work(&expires_work);
811 schedule_delayed_work(&expires_work, HZ/10);
812 mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval); 782 mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval);
813} 783}
814 784
@@ -885,7 +855,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
885 rthp = &rt_hash_table[k].chain; 855 rthp = &rt_hash_table[k].chain;
886 spin_lock_bh(rt_hash_lock_addr(k)); 856 spin_lock_bh(rt_hash_lock_addr(k));
887 while ((rth = *rthp) != NULL) { 857 while ((rth = *rthp) != NULL) {
888 if (!rt_may_expire(rth, tmo, expire)) { 858 if (rth->rt_genid == atomic_read(&rt_genid) &&
859 !rt_may_expire(rth, tmo, expire)) {
889 tmo >>= 1; 860 tmo >>= 1;
890 rthp = &rth->u.dst.rt_next; 861 rthp = &rth->u.dst.rt_next;
891 continue; 862 continue;
@@ -966,6 +937,11 @@ restart:
966 937
967 spin_lock_bh(rt_hash_lock_addr(hash)); 938 spin_lock_bh(rt_hash_lock_addr(hash));
968 while ((rth = *rthp) != NULL) { 939 while ((rth = *rthp) != NULL) {
940 if (rth->rt_genid != atomic_read(&rt_genid)) {
941 *rthp = rth->u.dst.rt_next;
942 rt_free(rth);
943 continue;
944 }
969 if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { 945 if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) {
970 /* Put it first */ 946 /* Put it first */
971 *rthp = rth->u.dst.rt_next; 947 *rthp = rth->u.dst.rt_next;
@@ -1131,17 +1107,19 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
1131 1107
1132static void rt_del(unsigned hash, struct rtable *rt) 1108static void rt_del(unsigned hash, struct rtable *rt)
1133{ 1109{
1134 struct rtable **rthp; 1110 struct rtable **rthp, *aux;
1135 1111
1112 rthp = &rt_hash_table[hash].chain;
1136 spin_lock_bh(rt_hash_lock_addr(hash)); 1113 spin_lock_bh(rt_hash_lock_addr(hash));
1137 ip_rt_put(rt); 1114 ip_rt_put(rt);
1138 for (rthp = &rt_hash_table[hash].chain; *rthp; 1115 while ((aux = *rthp) != NULL) {
1139 rthp = &(*rthp)->u.dst.rt_next) 1116 if (aux == rt || (aux->rt_genid != atomic_read(&rt_genid))) {
1140 if (*rthp == rt) { 1117 *rthp = aux->u.dst.rt_next;
1141 *rthp = rt->u.dst.rt_next; 1118 rt_free(aux);
1142 rt_free(rt); 1119 continue;
1143 break;
1144 } 1120 }
1121 rthp = &aux->u.dst.rt_next;
1122 }
1145 spin_unlock_bh(rt_hash_lock_addr(hash)); 1123 spin_unlock_bh(rt_hash_lock_addr(hash));
1146} 1124}
1147 1125
@@ -1186,7 +1164,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1186 if (rth->fl.fl4_dst != daddr || 1164 if (rth->fl.fl4_dst != daddr ||
1187 rth->fl.fl4_src != skeys[i] || 1165 rth->fl.fl4_src != skeys[i] ||
1188 rth->fl.oif != ikeys[k] || 1166 rth->fl.oif != ikeys[k] ||
1189 rth->fl.iif != 0) { 1167 rth->fl.iif != 0 ||
1168 rth->rt_genid != atomic_read(&rt_genid)) {
1190 rthp = &rth->u.dst.rt_next; 1169 rthp = &rth->u.dst.rt_next;
1191 continue; 1170 continue;
1192 } 1171 }
@@ -1224,7 +1203,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1224 rt->u.dst.neighbour = NULL; 1203 rt->u.dst.neighbour = NULL;
1225 rt->u.dst.hh = NULL; 1204 rt->u.dst.hh = NULL;
1226 rt->u.dst.xfrm = NULL; 1205 rt->u.dst.xfrm = NULL;
1227 1206 rt->rt_genid = atomic_read(&rt_genid);
1228 rt->rt_flags |= RTCF_REDIRECTED; 1207 rt->rt_flags |= RTCF_REDIRECTED;
1229 1208
1230 /* Gateway is different ... */ 1209 /* Gateway is different ... */
@@ -1445,7 +1424,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1445 rth->rt_src == iph->saddr && 1424 rth->rt_src == iph->saddr &&
1446 rth->fl.iif == 0 && 1425 rth->fl.iif == 0 &&
1447 !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) && 1426 !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) &&
1448 rth->u.dst.dev->nd_net == net) { 1427 rth->u.dst.dev->nd_net == net &&
1428 rth->rt_genid == atomic_read(&rt_genid)) {
1449 unsigned short mtu = new_mtu; 1429 unsigned short mtu = new_mtu;
1450 1430
1451 if (new_mtu < 68 || new_mtu >= old_mtu) { 1431 if (new_mtu < 68 || new_mtu >= old_mtu) {
@@ -1680,8 +1660,9 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1680 rth->fl.oif = 0; 1660 rth->fl.oif = 0;
1681 rth->rt_gateway = daddr; 1661 rth->rt_gateway = daddr;
1682 rth->rt_spec_dst= spec_dst; 1662 rth->rt_spec_dst= spec_dst;
1683 rth->rt_type = RTN_MULTICAST; 1663 rth->rt_genid = atomic_read(&rt_genid);
1684 rth->rt_flags = RTCF_MULTICAST; 1664 rth->rt_flags = RTCF_MULTICAST;
1665 rth->rt_type = RTN_MULTICAST;
1685 if (our) { 1666 if (our) {
1686 rth->u.dst.input= ip_local_deliver; 1667 rth->u.dst.input= ip_local_deliver;
1687 rth->rt_flags |= RTCF_LOCAL; 1668 rth->rt_flags |= RTCF_LOCAL;
@@ -1820,6 +1801,7 @@ static inline int __mkroute_input(struct sk_buff *skb,
1820 1801
1821 rth->u.dst.input = ip_forward; 1802 rth->u.dst.input = ip_forward;
1822 rth->u.dst.output = ip_output; 1803 rth->u.dst.output = ip_output;
1804 rth->rt_genid = atomic_read(&rt_genid);
1823 1805
1824 rt_set_nexthop(rth, res, itag); 1806 rt_set_nexthop(rth, res, itag);
1825 1807
@@ -1980,6 +1962,7 @@ local_input:
1980 goto e_nobufs; 1962 goto e_nobufs;
1981 1963
1982 rth->u.dst.output= ip_rt_bug; 1964 rth->u.dst.output= ip_rt_bug;
1965 rth->rt_genid = atomic_read(&rt_genid);
1983 1966
1984 atomic_set(&rth->u.dst.__refcnt, 1); 1967 atomic_set(&rth->u.dst.__refcnt, 1);
1985 rth->u.dst.flags= DST_HOST; 1968 rth->u.dst.flags= DST_HOST;
@@ -2071,7 +2054,8 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2071 rth->fl.oif == 0 && 2054 rth->fl.oif == 0 &&
2072 rth->fl.mark == skb->mark && 2055 rth->fl.mark == skb->mark &&
2073 rth->fl.fl4_tos == tos && 2056 rth->fl.fl4_tos == tos &&
2074 rth->u.dst.dev->nd_net == net) { 2057 rth->u.dst.dev->nd_net == net &&
2058 rth->rt_genid == atomic_read(&rt_genid)) {
2075 dst_use(&rth->u.dst, jiffies); 2059 dst_use(&rth->u.dst, jiffies);
2076 RT_CACHE_STAT_INC(in_hit); 2060 RT_CACHE_STAT_INC(in_hit);
2077 rcu_read_unlock(); 2061 rcu_read_unlock();
@@ -2199,6 +2183,7 @@ static inline int __mkroute_output(struct rtable **result,
2199 rth->rt_spec_dst= fl->fl4_src; 2183 rth->rt_spec_dst= fl->fl4_src;
2200 2184
2201 rth->u.dst.output=ip_output; 2185 rth->u.dst.output=ip_output;
2186 rth->rt_genid = atomic_read(&rt_genid);
2202 2187
2203 RT_CACHE_STAT_INC(out_slow_tot); 2188 RT_CACHE_STAT_INC(out_slow_tot);
2204 2189
@@ -2471,7 +2456,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2471 rth->fl.mark == flp->mark && 2456 rth->fl.mark == flp->mark &&
2472 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2457 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
2473 (IPTOS_RT_MASK | RTO_ONLINK)) && 2458 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2474 rth->u.dst.dev->nd_net == net) { 2459 rth->u.dst.dev->nd_net == net &&
2460 rth->rt_genid == atomic_read(&rt_genid)) {
2475 dst_use(&rth->u.dst, jiffies); 2461 dst_use(&rth->u.dst, jiffies);
2476 RT_CACHE_STAT_INC(out_hit); 2462 RT_CACHE_STAT_INC(out_hit);
2477 rcu_read_unlock_bh(); 2463 rcu_read_unlock_bh();
@@ -2498,6 +2484,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2498 .check = ipv4_dst_check, 2484 .check = ipv4_dst_check,
2499 .update_pmtu = ipv4_rt_blackhole_update_pmtu, 2485 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2500 .entry_size = sizeof(struct rtable), 2486 .entry_size = sizeof(struct rtable),
2487 .entries = ATOMIC_INIT(0),
2501}; 2488};
2502 2489
2503 2490
@@ -2525,6 +2512,7 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock
2525 rt->idev = ort->idev; 2512 rt->idev = ort->idev;
2526 if (rt->idev) 2513 if (rt->idev)
2527 in_dev_hold(rt->idev); 2514 in_dev_hold(rt->idev);
2515 rt->rt_genid = atomic_read(&rt_genid);
2528 rt->rt_flags = ort->rt_flags; 2516 rt->rt_flags = ort->rt_flags;
2529 rt->rt_type = ort->rt_type; 2517 rt->rt_type = ort->rt_type;
2530 rt->rt_dst = ort->rt_dst; 2518 rt->rt_dst = ort->rt_dst;
@@ -2779,6 +2767,8 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2779 rt = rcu_dereference(rt->u.dst.rt_next), idx++) { 2767 rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
2780 if (idx < s_idx) 2768 if (idx < s_idx)
2781 continue; 2769 continue;
2770 if (rt->rt_genid != atomic_read(&rt_genid))
2771 continue;
2782 skb->dst = dst_clone(&rt->u.dst); 2772 skb->dst = dst_clone(&rt->u.dst);
2783 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, 2773 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
2784 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 2774 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
@@ -2848,24 +2838,6 @@ ctl_table ipv4_route_table[] = {
2848 .strategy = &ipv4_sysctl_rtcache_flush_strategy, 2838 .strategy = &ipv4_sysctl_rtcache_flush_strategy,
2849 }, 2839 },
2850 { 2840 {
2851 .ctl_name = NET_IPV4_ROUTE_MIN_DELAY,
2852 .procname = "min_delay",
2853 .data = &ip_rt_min_delay,
2854 .maxlen = sizeof(int),
2855 .mode = 0644,
2856 .proc_handler = &proc_dointvec_jiffies,
2857 .strategy = &sysctl_jiffies,
2858 },
2859 {
2860 .ctl_name = NET_IPV4_ROUTE_MAX_DELAY,
2861 .procname = "max_delay",
2862 .data = &ip_rt_max_delay,
2863 .maxlen = sizeof(int),
2864 .mode = 0644,
2865 .proc_handler = &proc_dointvec_jiffies,
2866 .strategy = &sysctl_jiffies,
2867 },
2868 {
2869 .ctl_name = NET_IPV4_ROUTE_GC_THRESH, 2841 .ctl_name = NET_IPV4_ROUTE_GC_THRESH,
2870 .procname = "gc_thresh", 2842 .procname = "gc_thresh",
2871 .data = &ipv4_dst_ops.gc_thresh, 2843 .data = &ipv4_dst_ops.gc_thresh,
@@ -3023,8 +2995,8 @@ int __init ip_rt_init(void)
3023{ 2995{
3024 int rc = 0; 2996 int rc = 0;
3025 2997
3026 rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^ 2998 atomic_set(&rt_genid, (int) ((num_physpages ^ (num_physpages>>8)) ^
3027 (jiffies ^ (jiffies >> 7))); 2999 (jiffies ^ (jiffies >> 7))));
3028 3000
3029#ifdef CONFIG_NET_CLS_ROUTE 3001#ifdef CONFIG_NET_CLS_ROUTE
3030 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); 3002 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct));
@@ -3057,7 +3029,6 @@ int __init ip_rt_init(void)
3057 devinet_init(); 3029 devinet_init();
3058 ip_fib_init(); 3030 ip_fib_init();
3059 3031
3060 setup_timer(&rt_flush_timer, rt_run_flush, 0);
3061 setup_timer(&rt_secret_timer, rt_secret_rebuild, 0); 3032 setup_timer(&rt_secret_timer, rt_secret_rebuild, 0);
3062 3033
3063 /* All the timers, started at system startup tend 3034 /* All the timers, started at system startup tend
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 82cdf23837e3..88286f35d1e2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -185,7 +185,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam
185 185
186 tcp_get_available_congestion_control(tbl.data, tbl.maxlen); 186 tcp_get_available_congestion_control(tbl.data, tbl.maxlen);
187 ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); 187 ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen);
188 if (ret == 0 && newval && newlen) 188 if (ret == 1 && newval && newlen)
189 ret = tcp_set_allowed_congestion_control(tbl.data); 189 ret = tcp_set_allowed_congestion_control(tbl.data);
190 kfree(tbl.data); 190 kfree(tbl.data);
191 191
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fa2c85ca5bc3..19c449f62672 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2153,7 +2153,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit)
2153 tp->lost_skb_hint = skb; 2153 tp->lost_skb_hint = skb;
2154 tp->lost_cnt_hint = cnt; 2154 tp->lost_cnt_hint = cnt;
2155 2155
2156 if (tcp_is_fack(tp) || 2156 if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
2157 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) 2157 (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2158 cnt += tcp_skb_pcount(skb); 2158 cnt += tcp_skb_pcount(skb);
2159 2159
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9aea88b8d4fc..77c1939a2b0d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -369,8 +369,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
369 return; 369 return;
370 } 370 }
371 371
372 sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, 372 sk = inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->daddr, th->dest,
373 th->source, inet_iif(skb)); 373 iph->saddr, th->source, inet_iif(skb));
374 if (!sk) { 374 if (!sk) {
375 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 375 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
376 return; 376 return;
@@ -1503,8 +1503,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1503 if (req) 1503 if (req)
1504 return tcp_check_req(sk, skb, req, prev); 1504 return tcp_check_req(sk, skb, req, prev);
1505 1505
1506 nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source, 1506 nsk = inet_lookup_established(sk->sk_net, &tcp_hashinfo, iph->saddr,
1507 iph->daddr, th->dest, inet_iif(skb)); 1507 th->source, iph->daddr, th->dest, inet_iif(skb));
1508 1508
1509 if (nsk) { 1509 if (nsk) {
1510 if (nsk->sk_state != TCP_TIME_WAIT) { 1510 if (nsk->sk_state != TCP_TIME_WAIT) {
@@ -1661,8 +1661,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
1661 TCP_SKB_CB(skb)->flags = iph->tos; 1661 TCP_SKB_CB(skb)->flags = iph->tos;
1662 TCP_SKB_CB(skb)->sacked = 0; 1662 TCP_SKB_CB(skb)->sacked = 0;
1663 1663
1664 sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source, 1664 sk = __inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->saddr,
1665 iph->daddr, th->dest, inet_iif(skb)); 1665 th->source, iph->daddr, th->dest, inet_iif(skb));
1666 if (!sk) 1666 if (!sk)
1667 goto no_tcp_socket; 1667 goto no_tcp_socket;
1668 1668
@@ -1735,7 +1735,8 @@ do_time_wait:
1735 } 1735 }
1736 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1736 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1737 case TCP_TW_SYN: { 1737 case TCP_TW_SYN: {
1738 struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, 1738 struct sock *sk2 = inet_lookup_listener(skb->dev->nd_net,
1739 &tcp_hashinfo,
1739 iph->daddr, th->dest, 1740 iph->daddr, th->dest,
1740 inet_iif(skb)); 1741 inet_iif(skb));
1741 if (sk2) { 1742 if (sk2) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 89f0188885c7..ed750f9ceb07 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2564,5 +2564,4 @@ EXPORT_SYMBOL(tcp_connect);
2564EXPORT_SYMBOL(tcp_make_synack); 2564EXPORT_SYMBOL(tcp_make_synack);
2565EXPORT_SYMBOL(tcp_simple_retransmit); 2565EXPORT_SYMBOL(tcp_simple_retransmit);
2566EXPORT_SYMBOL(tcp_sync_mss); 2566EXPORT_SYMBOL(tcp_sync_mss);
2567EXPORT_SYMBOL(sysctl_tcp_tso_win_divisor);
2568EXPORT_SYMBOL(tcp_mtup_init); 2567EXPORT_SYMBOL(tcp_mtup_init);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2fb8d731026b..7ea1b67b6de1 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -130,14 +130,14 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min);
130atomic_t udp_memory_allocated; 130atomic_t udp_memory_allocated;
131EXPORT_SYMBOL(udp_memory_allocated); 131EXPORT_SYMBOL(udp_memory_allocated);
132 132
133static inline int __udp_lib_lport_inuse(__u16 num, 133static inline int __udp_lib_lport_inuse(struct net *net, __u16 num,
134 const struct hlist_head udptable[]) 134 const struct hlist_head udptable[])
135{ 135{
136 struct sock *sk; 136 struct sock *sk;
137 struct hlist_node *node; 137 struct hlist_node *node;
138 138
139 sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) 139 sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
140 if (sk->sk_hash == num) 140 if (sk->sk_net == net && sk->sk_hash == num)
141 return 1; 141 return 1;
142 return 0; 142 return 0;
143} 143}
@@ -159,6 +159,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
159 struct hlist_head *head; 159 struct hlist_head *head;
160 struct sock *sk2; 160 struct sock *sk2;
161 int error = 1; 161 int error = 1;
162 struct net *net = sk->sk_net;
162 163
163 write_lock_bh(&udp_hash_lock); 164 write_lock_bh(&udp_hash_lock);
164 165
@@ -198,7 +199,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
198 /* 2nd pass: find hole in shortest hash chain */ 199 /* 2nd pass: find hole in shortest hash chain */
199 rover = best; 200 rover = best;
200 for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) { 201 for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) {
201 if (! __udp_lib_lport_inuse(rover, udptable)) 202 if (! __udp_lib_lport_inuse(net, rover, udptable))
202 goto gotit; 203 goto gotit;
203 rover += UDP_HTABLE_SIZE; 204 rover += UDP_HTABLE_SIZE;
204 if (rover > high) 205 if (rover > high)
@@ -218,6 +219,7 @@ gotit:
218 sk_for_each(sk2, node, head) 219 sk_for_each(sk2, node, head)
219 if (sk2->sk_hash == snum && 220 if (sk2->sk_hash == snum &&
220 sk2 != sk && 221 sk2 != sk &&
222 sk2->sk_net == net &&
221 (!sk2->sk_reuse || !sk->sk_reuse) && 223 (!sk2->sk_reuse || !sk->sk_reuse) &&
222 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if 224 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
223 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 225 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
@@ -261,9 +263,9 @@ static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
261/* UDP is nearly always wildcards out the wazoo, it makes no sense to try 263/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
262 * harder than this. -DaveM 264 * harder than this. -DaveM
263 */ 265 */
264static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, 266static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
265 __be32 daddr, __be16 dport, 267 __be16 sport, __be32 daddr, __be16 dport,
266 int dif, struct hlist_head udptable[]) 268 int dif, struct hlist_head udptable[])
267{ 269{
268 struct sock *sk, *result = NULL; 270 struct sock *sk, *result = NULL;
269 struct hlist_node *node; 271 struct hlist_node *node;
@@ -274,7 +276,8 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
274 sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { 276 sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
275 struct inet_sock *inet = inet_sk(sk); 277 struct inet_sock *inet = inet_sk(sk);
276 278
277 if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) { 279 if (sk->sk_net == net && sk->sk_hash == hnum &&
280 !ipv6_only_sock(sk)) {
278 int score = (sk->sk_family == PF_INET ? 1 : 0); 281 int score = (sk->sk_family == PF_INET ? 1 : 0);
279 if (inet->rcv_saddr) { 282 if (inet->rcv_saddr) {
280 if (inet->rcv_saddr != daddr) 283 if (inet->rcv_saddr != daddr)
@@ -361,8 +364,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
361 int harderr; 364 int harderr;
362 int err; 365 int err;
363 366
364 sk = __udp4_lib_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, 367 sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest,
365 skb->dev->ifindex, udptable ); 368 iph->saddr, uh->source, skb->dev->ifindex, udptable);
366 if (sk == NULL) { 369 if (sk == NULL) {
367 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 370 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
368 return; /* No socket for error */ 371 return; /* No socket for error */
@@ -1185,8 +1188,8 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
1185 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) 1188 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
1186 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); 1189 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
1187 1190
1188 sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, 1191 sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr,
1189 inet_iif(skb), udptable); 1192 uh->dest, inet_iif(skb), udptable);
1190 1193
1191 if (sk != NULL) { 1194 if (sk != NULL) {
1192 int ret = 0; 1195 int ret = 0;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 3783e3ee56a4..10ed70491434 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -247,6 +247,7 @@ static struct dst_ops xfrm4_dst_ops = {
247 .local_out = __ip_local_out, 247 .local_out = __ip_local_out,
248 .gc_thresh = 1024, 248 .gc_thresh = 1024,
249 .entry_size = sizeof(struct xfrm_dst), 249 .entry_size = sizeof(struct xfrm_dst),
250 .entries = ATOMIC_INIT(0),
250}; 251};
251 252
252static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { 253static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 326845195620..41f5982d2087 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -38,7 +38,7 @@ static void ipip_destroy(struct xfrm_state *x)
38{ 38{
39} 39}
40 40
41static struct xfrm_type ipip_type = { 41static const struct xfrm_type ipip_type = {
42 .description = "IPIP", 42 .description = "IPIP",
43 .owner = THIS_MODULE, 43 .owner = THIS_MODULE,
44 .proto = IPPROTO_IPIP, 44 .proto = IPPROTO_IPIP,
@@ -50,7 +50,7 @@ static struct xfrm_type ipip_type = {
50 50
51static int xfrm_tunnel_rcv(struct sk_buff *skb) 51static int xfrm_tunnel_rcv(struct sk_buff *skb)
52{ 52{
53 return xfrm4_rcv_spi(skb, IPPROTO_IP, ip_hdr(skb)->saddr); 53 return xfrm4_rcv_spi(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr);
54} 54}
55 55
56static int xfrm_tunnel_err(struct sk_buff *skb, u32 info) 56static int xfrm_tunnel_err(struct sk_buff *skb, u32 info)