diff options
Diffstat (limited to 'net/ipv4')
47 files changed, 1102 insertions, 748 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 24e2b7294bf8..19880b086e71 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -343,6 +343,7 @@ config INET_ESP | |||
343 | tristate "IP: ESP transformation" | 343 | tristate "IP: ESP transformation" |
344 | select XFRM | 344 | select XFRM |
345 | select CRYPTO | 345 | select CRYPTO |
346 | select CRYPTO_AEAD | ||
346 | select CRYPTO_HMAC | 347 | select CRYPTO_HMAC |
347 | select CRYPTO_MD5 | 348 | select CRYPTO_MD5 |
348 | select CRYPTO_CBC | 349 | select CRYPTO_CBC |
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index d76803a3dcae..9d4555ec0b59 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c | |||
@@ -300,7 +300,7 @@ static void ah_destroy(struct xfrm_state *x) | |||
300 | } | 300 | } |
301 | 301 | ||
302 | 302 | ||
303 | static struct xfrm_type ah_type = | 303 | static const struct xfrm_type ah_type = |
304 | { | 304 | { |
305 | .description = "AH4", | 305 | .description = "AH4", |
306 | .owner = THIS_MODULE, | 306 | .owner = THIS_MODULE, |
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 5976c598cc4b..8e17f65f4002 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -558,8 +558,9 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt) | |||
558 | */ | 558 | */ |
559 | struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, | 559 | struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, |
560 | struct net_device *dev, __be32 src_ip, | 560 | struct net_device *dev, __be32 src_ip, |
561 | unsigned char *dest_hw, unsigned char *src_hw, | 561 | const unsigned char *dest_hw, |
562 | unsigned char *target_hw) | 562 | const unsigned char *src_hw, |
563 | const unsigned char *target_hw) | ||
563 | { | 564 | { |
564 | struct sk_buff *skb; | 565 | struct sk_buff *skb; |
565 | struct arphdr *arp; | 566 | struct arphdr *arp; |
@@ -672,8 +673,8 @@ void arp_xmit(struct sk_buff *skb) | |||
672 | */ | 673 | */ |
673 | void arp_send(int type, int ptype, __be32 dest_ip, | 674 | void arp_send(int type, int ptype, __be32 dest_ip, |
674 | struct net_device *dev, __be32 src_ip, | 675 | struct net_device *dev, __be32 src_ip, |
675 | unsigned char *dest_hw, unsigned char *src_hw, | 676 | const unsigned char *dest_hw, const unsigned char *src_hw, |
676 | unsigned char *target_hw) | 677 | const unsigned char *target_hw) |
677 | { | 678 | { |
678 | struct sk_buff *skb; | 679 | struct sk_buff *skb; |
679 | 680 | ||
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 21f71bf912d5..f282b26f63eb 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -64,7 +64,7 @@ | |||
64 | #include <net/rtnetlink.h> | 64 | #include <net/rtnetlink.h> |
65 | #include <net/net_namespace.h> | 65 | #include <net/net_namespace.h> |
66 | 66 | ||
67 | struct ipv4_devconf ipv4_devconf = { | 67 | static struct ipv4_devconf ipv4_devconf = { |
68 | .data = { | 68 | .data = { |
69 | [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, | 69 | [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, |
70 | [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, | 70 | [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, |
@@ -485,46 +485,41 @@ errout: | |||
485 | return err; | 485 | return err; |
486 | } | 486 | } |
487 | 487 | ||
488 | static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) | 488 | static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) |
489 | { | 489 | { |
490 | struct nlattr *tb[IFA_MAX+1]; | 490 | struct nlattr *tb[IFA_MAX+1]; |
491 | struct in_ifaddr *ifa; | 491 | struct in_ifaddr *ifa; |
492 | struct ifaddrmsg *ifm; | 492 | struct ifaddrmsg *ifm; |
493 | struct net_device *dev; | 493 | struct net_device *dev; |
494 | struct in_device *in_dev; | 494 | struct in_device *in_dev; |
495 | int err = -EINVAL; | 495 | int err; |
496 | 496 | ||
497 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); | 497 | err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); |
498 | if (err < 0) | 498 | if (err < 0) |
499 | goto errout; | 499 | goto errout; |
500 | 500 | ||
501 | ifm = nlmsg_data(nlh); | 501 | ifm = nlmsg_data(nlh); |
502 | if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) { | 502 | err = -EINVAL; |
503 | err = -EINVAL; | 503 | if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) |
504 | goto errout; | 504 | goto errout; |
505 | } | ||
506 | 505 | ||
507 | dev = __dev_get_by_index(&init_net, ifm->ifa_index); | 506 | dev = __dev_get_by_index(net, ifm->ifa_index); |
508 | if (dev == NULL) { | 507 | err = -ENODEV; |
509 | err = -ENODEV; | 508 | if (dev == NULL) |
510 | goto errout; | 509 | goto errout; |
511 | } | ||
512 | 510 | ||
513 | in_dev = __in_dev_get_rtnl(dev); | 511 | in_dev = __in_dev_get_rtnl(dev); |
514 | if (in_dev == NULL) { | 512 | err = -ENOBUFS; |
515 | err = -ENOBUFS; | 513 | if (in_dev == NULL) |
516 | goto errout; | 514 | goto errout; |
517 | } | ||
518 | 515 | ||
519 | ifa = inet_alloc_ifa(); | 516 | ifa = inet_alloc_ifa(); |
520 | if (ifa == NULL) { | 517 | if (ifa == NULL) |
521 | /* | 518 | /* |
522 | * A potential indev allocation can be left alive, it stays | 519 | * A potential indev allocation can be left alive, it stays |
523 | * assigned to its device and is destroy with it. | 520 | * assigned to its device and is destroy with it. |
524 | */ | 521 | */ |
525 | err = -ENOBUFS; | ||
526 | goto errout; | 522 | goto errout; |
527 | } | ||
528 | 523 | ||
529 | ipv4_devconf_setall(in_dev); | 524 | ipv4_devconf_setall(in_dev); |
530 | in_dev_hold(in_dev); | 525 | in_dev_hold(in_dev); |
@@ -568,7 +563,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg | |||
568 | if (net != &init_net) | 563 | if (net != &init_net) |
569 | return -EINVAL; | 564 | return -EINVAL; |
570 | 565 | ||
571 | ifa = rtm_to_ifaddr(nlh); | 566 | ifa = rtm_to_ifaddr(net, nlh); |
572 | if (IS_ERR(ifa)) | 567 | if (IS_ERR(ifa)) |
573 | return PTR_ERR(ifa); | 568 | return PTR_ERR(ifa); |
574 | 569 | ||
@@ -1182,7 +1177,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) | |||
1182 | 1177 | ||
1183 | s_ip_idx = ip_idx = cb->args[1]; | 1178 | s_ip_idx = ip_idx = cb->args[1]; |
1184 | idx = 0; | 1179 | idx = 0; |
1185 | for_each_netdev(&init_net, dev) { | 1180 | for_each_netdev(net, dev) { |
1186 | if (idx < s_idx) | 1181 | if (idx < s_idx) |
1187 | goto cont; | 1182 | goto cont; |
1188 | if (idx > s_idx) | 1183 | if (idx > s_idx) |
@@ -1216,7 +1211,9 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, | |||
1216 | struct sk_buff *skb; | 1211 | struct sk_buff *skb; |
1217 | u32 seq = nlh ? nlh->nlmsg_seq : 0; | 1212 | u32 seq = nlh ? nlh->nlmsg_seq : 0; |
1218 | int err = -ENOBUFS; | 1213 | int err = -ENOBUFS; |
1214 | struct net *net; | ||
1219 | 1215 | ||
1216 | net = ifa->ifa_dev->dev->nd_net; | ||
1220 | skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); | 1217 | skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL); |
1221 | if (skb == NULL) | 1218 | if (skb == NULL) |
1222 | goto errout; | 1219 | goto errout; |
@@ -1228,10 +1225,10 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, | |||
1228 | kfree_skb(skb); | 1225 | kfree_skb(skb); |
1229 | goto errout; | 1226 | goto errout; |
1230 | } | 1227 | } |
1231 | err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); | 1228 | err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); |
1232 | errout: | 1229 | errout: |
1233 | if (err < 0) | 1230 | if (err < 0) |
1234 | rtnl_set_sk_err(&init_net, RTNLGRP_IPV4_IFADDR, err); | 1231 | rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); |
1235 | } | 1232 | } |
1236 | 1233 | ||
1237 | #ifdef CONFIG_SYSCTL | 1234 | #ifdef CONFIG_SYSCTL |
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 28ea5c77ca23..258d17631b4b 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c | |||
@@ -1,27 +1,118 @@ | |||
1 | #include <crypto/aead.h> | ||
2 | #include <crypto/authenc.h> | ||
1 | #include <linux/err.h> | 3 | #include <linux/err.h> |
2 | #include <linux/module.h> | 4 | #include <linux/module.h> |
3 | #include <net/ip.h> | 5 | #include <net/ip.h> |
4 | #include <net/xfrm.h> | 6 | #include <net/xfrm.h> |
5 | #include <net/esp.h> | 7 | #include <net/esp.h> |
6 | #include <linux/scatterlist.h> | 8 | #include <linux/scatterlist.h> |
7 | #include <linux/crypto.h> | ||
8 | #include <linux/kernel.h> | 9 | #include <linux/kernel.h> |
9 | #include <linux/pfkeyv2.h> | 10 | #include <linux/pfkeyv2.h> |
10 | #include <linux/random.h> | 11 | #include <linux/rtnetlink.h> |
12 | #include <linux/slab.h> | ||
11 | #include <linux/spinlock.h> | 13 | #include <linux/spinlock.h> |
12 | #include <linux/in6.h> | 14 | #include <linux/in6.h> |
13 | #include <net/icmp.h> | 15 | #include <net/icmp.h> |
14 | #include <net/protocol.h> | 16 | #include <net/protocol.h> |
15 | #include <net/udp.h> | 17 | #include <net/udp.h> |
16 | 18 | ||
19 | struct esp_skb_cb { | ||
20 | struct xfrm_skb_cb xfrm; | ||
21 | void *tmp; | ||
22 | }; | ||
23 | |||
24 | #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) | ||
25 | |||
26 | /* | ||
27 | * Allocate an AEAD request structure with extra space for SG and IV. | ||
28 | * | ||
29 | * For alignment considerations the IV is placed at the front, followed | ||
30 | * by the request and finally the SG list. | ||
31 | * | ||
32 | * TODO: Use spare space in skb for this where possible. | ||
33 | */ | ||
34 | static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) | ||
35 | { | ||
36 | unsigned int len; | ||
37 | |||
38 | len = crypto_aead_ivsize(aead); | ||
39 | if (len) { | ||
40 | len += crypto_aead_alignmask(aead) & | ||
41 | ~(crypto_tfm_ctx_alignment() - 1); | ||
42 | len = ALIGN(len, crypto_tfm_ctx_alignment()); | ||
43 | } | ||
44 | |||
45 | len += sizeof(struct aead_givcrypt_request) + crypto_aead_reqsize(aead); | ||
46 | len = ALIGN(len, __alignof__(struct scatterlist)); | ||
47 | |||
48 | len += sizeof(struct scatterlist) * nfrags; | ||
49 | |||
50 | return kmalloc(len, GFP_ATOMIC); | ||
51 | } | ||
52 | |||
53 | static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp) | ||
54 | { | ||
55 | return crypto_aead_ivsize(aead) ? | ||
56 | PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp; | ||
57 | } | ||
58 | |||
59 | static inline struct aead_givcrypt_request *esp_tmp_givreq( | ||
60 | struct crypto_aead *aead, u8 *iv) | ||
61 | { | ||
62 | struct aead_givcrypt_request *req; | ||
63 | |||
64 | req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead), | ||
65 | crypto_tfm_ctx_alignment()); | ||
66 | aead_givcrypt_set_tfm(req, aead); | ||
67 | return req; | ||
68 | } | ||
69 | |||
70 | static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv) | ||
71 | { | ||
72 | struct aead_request *req; | ||
73 | |||
74 | req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead), | ||
75 | crypto_tfm_ctx_alignment()); | ||
76 | aead_request_set_tfm(req, aead); | ||
77 | return req; | ||
78 | } | ||
79 | |||
80 | static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, | ||
81 | struct aead_request *req) | ||
82 | { | ||
83 | return (void *)ALIGN((unsigned long)(req + 1) + | ||
84 | crypto_aead_reqsize(aead), | ||
85 | __alignof__(struct scatterlist)); | ||
86 | } | ||
87 | |||
88 | static inline struct scatterlist *esp_givreq_sg( | ||
89 | struct crypto_aead *aead, struct aead_givcrypt_request *req) | ||
90 | { | ||
91 | return (void *)ALIGN((unsigned long)(req + 1) + | ||
92 | crypto_aead_reqsize(aead), | ||
93 | __alignof__(struct scatterlist)); | ||
94 | } | ||
95 | |||
96 | static void esp_output_done(struct crypto_async_request *base, int err) | ||
97 | { | ||
98 | struct sk_buff *skb = base->data; | ||
99 | |||
100 | kfree(ESP_SKB_CB(skb)->tmp); | ||
101 | xfrm_output_resume(skb, err); | ||
102 | } | ||
103 | |||
17 | static int esp_output(struct xfrm_state *x, struct sk_buff *skb) | 104 | static int esp_output(struct xfrm_state *x, struct sk_buff *skb) |
18 | { | 105 | { |
19 | int err; | 106 | int err; |
20 | struct ip_esp_hdr *esph; | 107 | struct ip_esp_hdr *esph; |
21 | struct crypto_blkcipher *tfm; | 108 | struct crypto_aead *aead; |
22 | struct blkcipher_desc desc; | 109 | struct aead_givcrypt_request *req; |
110 | struct scatterlist *sg; | ||
111 | struct scatterlist *asg; | ||
23 | struct esp_data *esp; | 112 | struct esp_data *esp; |
24 | struct sk_buff *trailer; | 113 | struct sk_buff *trailer; |
114 | void *tmp; | ||
115 | u8 *iv; | ||
25 | u8 *tail; | 116 | u8 *tail; |
26 | int blksize; | 117 | int blksize; |
27 | int clen; | 118 | int clen; |
@@ -36,18 +127,27 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) | |||
36 | clen = skb->len; | 127 | clen = skb->len; |
37 | 128 | ||
38 | esp = x->data; | 129 | esp = x->data; |
39 | alen = esp->auth.icv_trunc_len; | 130 | aead = esp->aead; |
40 | tfm = esp->conf.tfm; | 131 | alen = crypto_aead_authsize(aead); |
41 | desc.tfm = tfm; | 132 | |
42 | desc.flags = 0; | 133 | blksize = ALIGN(crypto_aead_blocksize(aead), 4); |
43 | blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); | ||
44 | clen = ALIGN(clen + 2, blksize); | 134 | clen = ALIGN(clen + 2, blksize); |
45 | if (esp->conf.padlen) | 135 | if (esp->padlen) |
46 | clen = ALIGN(clen, esp->conf.padlen); | 136 | clen = ALIGN(clen, esp->padlen); |
137 | |||
138 | if ((err = skb_cow_data(skb, clen - skb->len + alen, &trailer)) < 0) | ||
139 | goto error; | ||
140 | nfrags = err; | ||
47 | 141 | ||
48 | if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) | 142 | tmp = esp_alloc_tmp(aead, nfrags + 1); |
143 | if (!tmp) | ||
49 | goto error; | 144 | goto error; |
50 | 145 | ||
146 | iv = esp_tmp_iv(aead, tmp); | ||
147 | req = esp_tmp_givreq(aead, iv); | ||
148 | asg = esp_givreq_sg(aead, req); | ||
149 | sg = asg + 1; | ||
150 | |||
51 | /* Fill padding... */ | 151 | /* Fill padding... */ |
52 | tail = skb_tail_pointer(trailer); | 152 | tail = skb_tail_pointer(trailer); |
53 | do { | 153 | do { |
@@ -56,28 +156,34 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) | |||
56 | tail[i] = i + 1; | 156 | tail[i] = i + 1; |
57 | } while (0); | 157 | } while (0); |
58 | tail[clen - skb->len - 2] = (clen - skb->len) - 2; | 158 | tail[clen - skb->len - 2] = (clen - skb->len) - 2; |
59 | pskb_put(skb, trailer, clen - skb->len); | 159 | tail[clen - skb->len - 1] = *skb_mac_header(skb); |
160 | pskb_put(skb, trailer, clen - skb->len + alen); | ||
60 | 161 | ||
61 | skb_push(skb, -skb_network_offset(skb)); | 162 | skb_push(skb, -skb_network_offset(skb)); |
62 | esph = ip_esp_hdr(skb); | 163 | esph = ip_esp_hdr(skb); |
63 | *(skb_tail_pointer(trailer) - 1) = *skb_mac_header(skb); | ||
64 | *skb_mac_header(skb) = IPPROTO_ESP; | 164 | *skb_mac_header(skb) = IPPROTO_ESP; |
65 | 165 | ||
66 | spin_lock_bh(&x->lock); | ||
67 | |||
68 | /* this is non-NULL only with UDP Encapsulation */ | 166 | /* this is non-NULL only with UDP Encapsulation */ |
69 | if (x->encap) { | 167 | if (x->encap) { |
70 | struct xfrm_encap_tmpl *encap = x->encap; | 168 | struct xfrm_encap_tmpl *encap = x->encap; |
71 | struct udphdr *uh; | 169 | struct udphdr *uh; |
72 | __be32 *udpdata32; | 170 | __be32 *udpdata32; |
171 | unsigned int sport, dport; | ||
172 | int encap_type; | ||
173 | |||
174 | spin_lock_bh(&x->lock); | ||
175 | sport = encap->encap_sport; | ||
176 | dport = encap->encap_dport; | ||
177 | encap_type = encap->encap_type; | ||
178 | spin_unlock_bh(&x->lock); | ||
73 | 179 | ||
74 | uh = (struct udphdr *)esph; | 180 | uh = (struct udphdr *)esph; |
75 | uh->source = encap->encap_sport; | 181 | uh->source = sport; |
76 | uh->dest = encap->encap_dport; | 182 | uh->dest = dport; |
77 | uh->len = htons(skb->len + alen - skb_transport_offset(skb)); | 183 | uh->len = htons(skb->len - skb_transport_offset(skb)); |
78 | uh->check = 0; | 184 | uh->check = 0; |
79 | 185 | ||
80 | switch (encap->encap_type) { | 186 | switch (encap_type) { |
81 | default: | 187 | default: |
82 | case UDP_ENCAP_ESPINUDP: | 188 | case UDP_ENCAP_ESPINUDP: |
83 | esph = (struct ip_esp_hdr *)(uh + 1); | 189 | esph = (struct ip_esp_hdr *)(uh + 1); |
@@ -95,131 +201,45 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) | |||
95 | esph->spi = x->id.spi; | 201 | esph->spi = x->id.spi; |
96 | esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq); | 202 | esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq); |
97 | 203 | ||
98 | if (esp->conf.ivlen) { | 204 | sg_init_table(sg, nfrags); |
99 | if (unlikely(!esp->conf.ivinitted)) { | 205 | skb_to_sgvec(skb, sg, |
100 | get_random_bytes(esp->conf.ivec, esp->conf.ivlen); | 206 | esph->enc_data + crypto_aead_ivsize(aead) - skb->data, |
101 | esp->conf.ivinitted = 1; | 207 | clen + alen); |
102 | } | 208 | sg_init_one(asg, esph, sizeof(*esph)); |
103 | crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); | 209 | |
104 | } | 210 | aead_givcrypt_set_callback(req, 0, esp_output_done, skb); |
105 | 211 | aead_givcrypt_set_crypt(req, sg, sg, clen, iv); | |
106 | do { | 212 | aead_givcrypt_set_assoc(req, asg, sizeof(*esph)); |
107 | struct scatterlist *sg = &esp->sgbuf[0]; | 213 | aead_givcrypt_set_giv(req, esph->enc_data, XFRM_SKB_CB(skb)->seq); |
108 | 214 | ||
109 | if (unlikely(nfrags > ESP_NUM_FAST_SG)) { | 215 | ESP_SKB_CB(skb)->tmp = tmp; |
110 | sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC); | 216 | err = crypto_aead_givencrypt(req); |
111 | if (!sg) | 217 | if (err == -EINPROGRESS) |
112 | goto unlock; | 218 | goto error; |
113 | } | ||
114 | sg_init_table(sg, nfrags); | ||
115 | skb_to_sgvec(skb, sg, | ||
116 | esph->enc_data + | ||
117 | esp->conf.ivlen - | ||
118 | skb->data, clen); | ||
119 | err = crypto_blkcipher_encrypt(&desc, sg, sg, clen); | ||
120 | if (unlikely(sg != &esp->sgbuf[0])) | ||
121 | kfree(sg); | ||
122 | } while (0); | ||
123 | |||
124 | if (unlikely(err)) | ||
125 | goto unlock; | ||
126 | |||
127 | if (esp->conf.ivlen) { | ||
128 | memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen); | ||
129 | crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen); | ||
130 | } | ||
131 | 219 | ||
132 | if (esp->auth.icv_full_len) { | 220 | if (err == -EBUSY) |
133 | err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data, | 221 | err = NET_XMIT_DROP; |
134 | sizeof(*esph) + esp->conf.ivlen + clen); | ||
135 | memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen); | ||
136 | } | ||
137 | 222 | ||
138 | unlock: | 223 | kfree(tmp); |
139 | spin_unlock_bh(&x->lock); | ||
140 | 224 | ||
141 | error: | 225 | error: |
142 | return err; | 226 | return err; |
143 | } | 227 | } |
144 | 228 | ||
145 | /* | 229 | static int esp_input_done2(struct sk_buff *skb, int err) |
146 | * Note: detecting truncated vs. non-truncated authentication data is very | ||
147 | * expensive, so we only support truncated data, which is the recommended | ||
148 | * and common case. | ||
149 | */ | ||
150 | static int esp_input(struct xfrm_state *x, struct sk_buff *skb) | ||
151 | { | 230 | { |
152 | struct iphdr *iph; | 231 | struct iphdr *iph; |
153 | struct ip_esp_hdr *esph; | 232 | struct xfrm_state *x = xfrm_input_state(skb); |
154 | struct esp_data *esp = x->data; | 233 | struct esp_data *esp = x->data; |
155 | struct crypto_blkcipher *tfm = esp->conf.tfm; | 234 | struct crypto_aead *aead = esp->aead; |
156 | struct blkcipher_desc desc = { .tfm = tfm }; | 235 | int alen = crypto_aead_authsize(aead); |
157 | struct sk_buff *trailer; | 236 | int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); |
158 | int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); | 237 | int elen = skb->len - hlen; |
159 | int alen = esp->auth.icv_trunc_len; | ||
160 | int elen = skb->len - sizeof(*esph) - esp->conf.ivlen - alen; | ||
161 | int nfrags; | ||
162 | int ihl; | 238 | int ihl; |
163 | u8 nexthdr[2]; | 239 | u8 nexthdr[2]; |
164 | struct scatterlist *sg; | ||
165 | int padlen; | 240 | int padlen; |
166 | int err = -EINVAL; | ||
167 | |||
168 | if (!pskb_may_pull(skb, sizeof(*esph))) | ||
169 | goto out; | ||
170 | |||
171 | if (elen <= 0 || (elen & (blksize-1))) | ||
172 | goto out; | ||
173 | |||
174 | if ((err = skb_cow_data(skb, 0, &trailer)) < 0) | ||
175 | goto out; | ||
176 | nfrags = err; | ||
177 | |||
178 | skb->ip_summed = CHECKSUM_NONE; | ||
179 | |||
180 | spin_lock(&x->lock); | ||
181 | |||
182 | /* If integrity check is required, do this. */ | ||
183 | if (esp->auth.icv_full_len) { | ||
184 | u8 sum[alen]; | ||
185 | 241 | ||
186 | err = esp_mac_digest(esp, skb, 0, skb->len - alen); | 242 | kfree(ESP_SKB_CB(skb)->tmp); |
187 | if (err) | ||
188 | goto unlock; | ||
189 | |||
190 | if (skb_copy_bits(skb, skb->len - alen, sum, alen)) | ||
191 | BUG(); | ||
192 | |||
193 | if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) { | ||
194 | err = -EBADMSG; | ||
195 | goto unlock; | ||
196 | } | ||
197 | } | ||
198 | |||
199 | esph = (struct ip_esp_hdr *)skb->data; | ||
200 | |||
201 | /* Get ivec. This can be wrong, check against another impls. */ | ||
202 | if (esp->conf.ivlen) | ||
203 | crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen); | ||
204 | |||
205 | sg = &esp->sgbuf[0]; | ||
206 | |||
207 | if (unlikely(nfrags > ESP_NUM_FAST_SG)) { | ||
208 | err = -ENOMEM; | ||
209 | sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC); | ||
210 | if (!sg) | ||
211 | goto unlock; | ||
212 | } | ||
213 | sg_init_table(sg, nfrags); | ||
214 | skb_to_sgvec(skb, sg, | ||
215 | sizeof(*esph) + esp->conf.ivlen, | ||
216 | elen); | ||
217 | err = crypto_blkcipher_decrypt(&desc, sg, sg, elen); | ||
218 | if (unlikely(sg != &esp->sgbuf[0])) | ||
219 | kfree(sg); | ||
220 | |||
221 | unlock: | ||
222 | spin_unlock(&x->lock); | ||
223 | 243 | ||
224 | if (unlikely(err)) | 244 | if (unlikely(err)) |
225 | goto out; | 245 | goto out; |
@@ -229,15 +249,11 @@ unlock: | |||
229 | 249 | ||
230 | err = -EINVAL; | 250 | err = -EINVAL; |
231 | padlen = nexthdr[0]; | 251 | padlen = nexthdr[0]; |
232 | if (padlen+2 >= elen) | 252 | if (padlen + 2 + alen >= elen) |
233 | goto out; | 253 | goto out; |
234 | 254 | ||
235 | /* ... check padding bits here. Silly. :-) */ | 255 | /* ... check padding bits here. Silly. :-) */ |
236 | 256 | ||
237 | /* RFC4303: Drop dummy packets without any error */ | ||
238 | if (nexthdr[1] == IPPROTO_NONE) | ||
239 | goto out; | ||
240 | |||
241 | iph = ip_hdr(skb); | 257 | iph = ip_hdr(skb); |
242 | ihl = iph->ihl * 4; | 258 | ihl = iph->ihl * 4; |
243 | 259 | ||
@@ -279,10 +295,87 @@ unlock: | |||
279 | } | 295 | } |
280 | 296 | ||
281 | pskb_trim(skb, skb->len - alen - padlen - 2); | 297 | pskb_trim(skb, skb->len - alen - padlen - 2); |
282 | __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen); | 298 | __skb_pull(skb, hlen); |
283 | skb_set_transport_header(skb, -ihl); | 299 | skb_set_transport_header(skb, -ihl); |
284 | 300 | ||
285 | return nexthdr[1]; | 301 | err = nexthdr[1]; |
302 | |||
303 | /* RFC4303: Drop dummy packets without any error */ | ||
304 | if (err == IPPROTO_NONE) | ||
305 | err = -EINVAL; | ||
306 | |||
307 | out: | ||
308 | return err; | ||
309 | } | ||
310 | |||
311 | static void esp_input_done(struct crypto_async_request *base, int err) | ||
312 | { | ||
313 | struct sk_buff *skb = base->data; | ||
314 | |||
315 | xfrm_input_resume(skb, esp_input_done2(skb, err)); | ||
316 | } | ||
317 | |||
318 | /* | ||
319 | * Note: detecting truncated vs. non-truncated authentication data is very | ||
320 | * expensive, so we only support truncated data, which is the recommended | ||
321 | * and common case. | ||
322 | */ | ||
323 | static int esp_input(struct xfrm_state *x, struct sk_buff *skb) | ||
324 | { | ||
325 | struct ip_esp_hdr *esph; | ||
326 | struct esp_data *esp = x->data; | ||
327 | struct crypto_aead *aead = esp->aead; | ||
328 | struct aead_request *req; | ||
329 | struct sk_buff *trailer; | ||
330 | int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); | ||
331 | int nfrags; | ||
332 | void *tmp; | ||
333 | u8 *iv; | ||
334 | struct scatterlist *sg; | ||
335 | struct scatterlist *asg; | ||
336 | int err = -EINVAL; | ||
337 | |||
338 | if (!pskb_may_pull(skb, sizeof(*esph))) | ||
339 | goto out; | ||
340 | |||
341 | if (elen <= 0) | ||
342 | goto out; | ||
343 | |||
344 | if ((err = skb_cow_data(skb, 0, &trailer)) < 0) | ||
345 | goto out; | ||
346 | nfrags = err; | ||
347 | |||
348 | err = -ENOMEM; | ||
349 | tmp = esp_alloc_tmp(aead, nfrags + 1); | ||
350 | if (!tmp) | ||
351 | goto out; | ||
352 | |||
353 | ESP_SKB_CB(skb)->tmp = tmp; | ||
354 | iv = esp_tmp_iv(aead, tmp); | ||
355 | req = esp_tmp_req(aead, iv); | ||
356 | asg = esp_req_sg(aead, req); | ||
357 | sg = asg + 1; | ||
358 | |||
359 | skb->ip_summed = CHECKSUM_NONE; | ||
360 | |||
361 | esph = (struct ip_esp_hdr *)skb->data; | ||
362 | |||
363 | /* Get ivec. This can be wrong, check against another impls. */ | ||
364 | iv = esph->enc_data; | ||
365 | |||
366 | sg_init_table(sg, nfrags); | ||
367 | skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); | ||
368 | sg_init_one(asg, esph, sizeof(*esph)); | ||
369 | |||
370 | aead_request_set_callback(req, 0, esp_input_done, skb); | ||
371 | aead_request_set_crypt(req, sg, sg, elen, iv); | ||
372 | aead_request_set_assoc(req, asg, sizeof(*esph)); | ||
373 | |||
374 | err = crypto_aead_decrypt(req); | ||
375 | if (err == -EINPROGRESS) | ||
376 | goto out; | ||
377 | |||
378 | err = esp_input_done2(skb, err); | ||
286 | 379 | ||
287 | out: | 380 | out: |
288 | return err; | 381 | return err; |
@@ -291,11 +384,11 @@ out: | |||
291 | static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) | 384 | static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) |
292 | { | 385 | { |
293 | struct esp_data *esp = x->data; | 386 | struct esp_data *esp = x->data; |
294 | u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); | 387 | u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4); |
295 | u32 align = max_t(u32, blksize, esp->conf.padlen); | 388 | u32 align = max_t(u32, blksize, esp->padlen); |
296 | u32 rem; | 389 | u32 rem; |
297 | 390 | ||
298 | mtu -= x->props.header_len + esp->auth.icv_trunc_len; | 391 | mtu -= x->props.header_len + crypto_aead_authsize(esp->aead); |
299 | rem = mtu & (align - 1); | 392 | rem = mtu & (align - 1); |
300 | mtu &= ~(align - 1); | 393 | mtu &= ~(align - 1); |
301 | 394 | ||
@@ -342,80 +435,143 @@ static void esp_destroy(struct xfrm_state *x) | |||
342 | if (!esp) | 435 | if (!esp) |
343 | return; | 436 | return; |
344 | 437 | ||
345 | crypto_free_blkcipher(esp->conf.tfm); | 438 | crypto_free_aead(esp->aead); |
346 | esp->conf.tfm = NULL; | ||
347 | kfree(esp->conf.ivec); | ||
348 | esp->conf.ivec = NULL; | ||
349 | crypto_free_hash(esp->auth.tfm); | ||
350 | esp->auth.tfm = NULL; | ||
351 | kfree(esp->auth.work_icv); | ||
352 | esp->auth.work_icv = NULL; | ||
353 | kfree(esp); | 439 | kfree(esp); |
354 | } | 440 | } |
355 | 441 | ||
356 | static int esp_init_state(struct xfrm_state *x) | 442 | static int esp_init_aead(struct xfrm_state *x) |
357 | { | 443 | { |
358 | struct esp_data *esp = NULL; | 444 | struct esp_data *esp = x->data; |
359 | struct crypto_blkcipher *tfm; | 445 | struct crypto_aead *aead; |
360 | u32 align; | 446 | int err; |
447 | |||
448 | aead = crypto_alloc_aead(x->aead->alg_name, 0, 0); | ||
449 | err = PTR_ERR(aead); | ||
450 | if (IS_ERR(aead)) | ||
451 | goto error; | ||
452 | |||
453 | esp->aead = aead; | ||
454 | |||
455 | err = crypto_aead_setkey(aead, x->aead->alg_key, | ||
456 | (x->aead->alg_key_len + 7) / 8); | ||
457 | if (err) | ||
458 | goto error; | ||
459 | |||
460 | err = crypto_aead_setauthsize(aead, x->aead->alg_icv_len / 8); | ||
461 | if (err) | ||
462 | goto error; | ||
463 | |||
464 | error: | ||
465 | return err; | ||
466 | } | ||
361 | 467 | ||
468 | static int esp_init_authenc(struct xfrm_state *x) | ||
469 | { | ||
470 | struct esp_data *esp = x->data; | ||
471 | struct crypto_aead *aead; | ||
472 | struct crypto_authenc_key_param *param; | ||
473 | struct rtattr *rta; | ||
474 | char *key; | ||
475 | char *p; | ||
476 | char authenc_name[CRYPTO_MAX_ALG_NAME]; | ||
477 | unsigned int keylen; | ||
478 | int err; | ||
479 | |||
480 | err = -EINVAL; | ||
362 | if (x->ealg == NULL) | 481 | if (x->ealg == NULL) |
363 | goto error; | 482 | goto error; |
364 | 483 | ||
365 | esp = kzalloc(sizeof(*esp), GFP_KERNEL); | 484 | err = -ENAMETOOLONG; |
366 | if (esp == NULL) | 485 | if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", |
367 | return -ENOMEM; | 486 | x->aalg ? x->aalg->alg_name : "digest_null", |
487 | x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) | ||
488 | goto error; | ||
489 | |||
490 | aead = crypto_alloc_aead(authenc_name, 0, 0); | ||
491 | err = PTR_ERR(aead); | ||
492 | if (IS_ERR(aead)) | ||
493 | goto error; | ||
494 | |||
495 | esp->aead = aead; | ||
496 | |||
497 | keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) + | ||
498 | (x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param)); | ||
499 | err = -ENOMEM; | ||
500 | key = kmalloc(keylen, GFP_KERNEL); | ||
501 | if (!key) | ||
502 | goto error; | ||
503 | |||
504 | p = key; | ||
505 | rta = (void *)p; | ||
506 | rta->rta_type = CRYPTO_AUTHENC_KEYA_PARAM; | ||
507 | rta->rta_len = RTA_LENGTH(sizeof(*param)); | ||
508 | param = RTA_DATA(rta); | ||
509 | p += RTA_SPACE(sizeof(*param)); | ||
368 | 510 | ||
369 | if (x->aalg) { | 511 | if (x->aalg) { |
370 | struct xfrm_algo_desc *aalg_desc; | 512 | struct xfrm_algo_desc *aalg_desc; |
371 | struct crypto_hash *hash; | ||
372 | 513 | ||
373 | hash = crypto_alloc_hash(x->aalg->alg_name, 0, | 514 | memcpy(p, x->aalg->alg_key, (x->aalg->alg_key_len + 7) / 8); |
374 | CRYPTO_ALG_ASYNC); | 515 | p += (x->aalg->alg_key_len + 7) / 8; |
375 | if (IS_ERR(hash)) | ||
376 | goto error; | ||
377 | |||
378 | esp->auth.tfm = hash; | ||
379 | if (crypto_hash_setkey(hash, x->aalg->alg_key, | ||
380 | (x->aalg->alg_key_len + 7) / 8)) | ||
381 | goto error; | ||
382 | 516 | ||
383 | aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); | 517 | aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); |
384 | BUG_ON(!aalg_desc); | 518 | BUG_ON(!aalg_desc); |
385 | 519 | ||
520 | err = -EINVAL; | ||
386 | if (aalg_desc->uinfo.auth.icv_fullbits/8 != | 521 | if (aalg_desc->uinfo.auth.icv_fullbits/8 != |
387 | crypto_hash_digestsize(hash)) { | 522 | crypto_aead_authsize(aead)) { |
388 | NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", | 523 | NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", |
389 | x->aalg->alg_name, | 524 | x->aalg->alg_name, |
390 | crypto_hash_digestsize(hash), | 525 | crypto_aead_authsize(aead), |
391 | aalg_desc->uinfo.auth.icv_fullbits/8); | 526 | aalg_desc->uinfo.auth.icv_fullbits/8); |
392 | goto error; | 527 | goto free_key; |
393 | } | 528 | } |
394 | 529 | ||
395 | esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8; | 530 | err = crypto_aead_setauthsize( |
396 | esp->auth.icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8; | 531 | aead, aalg_desc->uinfo.auth.icv_truncbits / 8); |
397 | 532 | if (err) | |
398 | esp->auth.work_icv = kmalloc(esp->auth.icv_full_len, GFP_KERNEL); | 533 | goto free_key; |
399 | if (!esp->auth.work_icv) | ||
400 | goto error; | ||
401 | } | 534 | } |
402 | 535 | ||
403 | tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC); | 536 | param->enckeylen = cpu_to_be32((x->ealg->alg_key_len + 7) / 8); |
404 | if (IS_ERR(tfm)) | 537 | memcpy(p, x->ealg->alg_key, (x->ealg->alg_key_len + 7) / 8); |
405 | goto error; | 538 | |
406 | esp->conf.tfm = tfm; | 539 | err = crypto_aead_setkey(aead, key, keylen); |
407 | esp->conf.ivlen = crypto_blkcipher_ivsize(tfm); | 540 | |
408 | esp->conf.padlen = 0; | 541 | free_key: |
409 | if (esp->conf.ivlen) { | 542 | kfree(key); |
410 | esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); | 543 | |
411 | if (unlikely(esp->conf.ivec == NULL)) | 544 | error: |
412 | goto error; | 545 | return err; |
413 | esp->conf.ivinitted = 0; | 546 | } |
414 | } | 547 | |
415 | if (crypto_blkcipher_setkey(tfm, x->ealg->alg_key, | 548 | static int esp_init_state(struct xfrm_state *x) |
416 | (x->ealg->alg_key_len + 7) / 8)) | 549 | { |
550 | struct esp_data *esp; | ||
551 | struct crypto_aead *aead; | ||
552 | u32 align; | ||
553 | int err; | ||
554 | |||
555 | esp = kzalloc(sizeof(*esp), GFP_KERNEL); | ||
556 | if (esp == NULL) | ||
557 | return -ENOMEM; | ||
558 | |||
559 | x->data = esp; | ||
560 | |||
561 | if (x->aead) | ||
562 | err = esp_init_aead(x); | ||
563 | else | ||
564 | err = esp_init_authenc(x); | ||
565 | |||
566 | if (err) | ||
417 | goto error; | 567 | goto error; |
418 | x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; | 568 | |
569 | aead = esp->aead; | ||
570 | |||
571 | esp->padlen = 0; | ||
572 | |||
573 | x->props.header_len = sizeof(struct ip_esp_hdr) + | ||
574 | crypto_aead_ivsize(aead); | ||
419 | if (x->props.mode == XFRM_MODE_TUNNEL) | 575 | if (x->props.mode == XFRM_MODE_TUNNEL) |
420 | x->props.header_len += sizeof(struct iphdr); | 576 | x->props.header_len += sizeof(struct iphdr); |
421 | else if (x->props.mode == XFRM_MODE_BEET) | 577 | else if (x->props.mode == XFRM_MODE_BEET) |
@@ -434,21 +590,17 @@ static int esp_init_state(struct xfrm_state *x) | |||
434 | break; | 590 | break; |
435 | } | 591 | } |
436 | } | 592 | } |
437 | x->data = esp; | 593 | |
438 | align = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); | 594 | align = ALIGN(crypto_aead_blocksize(aead), 4); |
439 | if (esp->conf.padlen) | 595 | if (esp->padlen) |
440 | align = max_t(u32, align, esp->conf.padlen); | 596 | align = max_t(u32, align, esp->padlen); |
441 | x->props.trailer_len = align + 1 + esp->auth.icv_trunc_len; | 597 | x->props.trailer_len = align + 1 + crypto_aead_authsize(esp->aead); |
442 | return 0; | ||
443 | 598 | ||
444 | error: | 599 | error: |
445 | x->data = esp; | 600 | return err; |
446 | esp_destroy(x); | ||
447 | x->data = NULL; | ||
448 | return -EINVAL; | ||
449 | } | 601 | } |
450 | 602 | ||
451 | static struct xfrm_type esp_type = | 603 | static const struct xfrm_type esp_type = |
452 | { | 604 | { |
453 | .description = "ESP4", | 605 | .description = "ESP4", |
454 | .owner = THIS_MODULE, | 606 | .owner = THIS_MODULE, |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d28261826bc2..86ff2711fc95 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -808,7 +808,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) | |||
808 | First of all, we scan fib_info list searching | 808 | First of all, we scan fib_info list searching |
809 | for stray nexthop entries, then ignite fib_flush. | 809 | for stray nexthop entries, then ignite fib_flush. |
810 | */ | 810 | */ |
811 | if (fib_sync_down(ifa->ifa_local, NULL, 0)) | 811 | if (fib_sync_down_addr(dev->nd_net, ifa->ifa_local)) |
812 | fib_flush(dev->nd_net); | 812 | fib_flush(dev->nd_net); |
813 | } | 813 | } |
814 | } | 814 | } |
@@ -898,7 +898,7 @@ static void nl_fib_lookup_exit(struct net *net) | |||
898 | 898 | ||
899 | static void fib_disable_ip(struct net_device *dev, int force) | 899 | static void fib_disable_ip(struct net_device *dev, int force) |
900 | { | 900 | { |
901 | if (fib_sync_down(0, dev, force)) | 901 | if (fib_sync_down_dev(dev, force)) |
902 | fib_flush(dev->nd_net); | 902 | fib_flush(dev->nd_net); |
903 | rt_cache_flush(0); | 903 | rt_cache_flush(0); |
904 | arp_ifdown(dev); | 904 | arp_ifdown(dev); |
@@ -975,6 +975,7 @@ static struct notifier_block fib_netdev_notifier = { | |||
975 | 975 | ||
976 | static int __net_init ip_fib_net_init(struct net *net) | 976 | static int __net_init ip_fib_net_init(struct net *net) |
977 | { | 977 | { |
978 | int err; | ||
978 | unsigned int i; | 979 | unsigned int i; |
979 | 980 | ||
980 | net->ipv4.fib_table_hash = kzalloc( | 981 | net->ipv4.fib_table_hash = kzalloc( |
@@ -985,7 +986,14 @@ static int __net_init ip_fib_net_init(struct net *net) | |||
985 | for (i = 0; i < FIB_TABLE_HASHSZ; i++) | 986 | for (i = 0; i < FIB_TABLE_HASHSZ; i++) |
986 | INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]); | 987 | INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]); |
987 | 988 | ||
988 | return fib4_rules_init(net); | 989 | err = fib4_rules_init(net); |
990 | if (err < 0) | ||
991 | goto fail; | ||
992 | return 0; | ||
993 | |||
994 | fail: | ||
995 | kfree(net->ipv4.fib_table_hash); | ||
996 | return err; | ||
989 | } | 997 | } |
990 | 998 | ||
991 | static void __net_exit ip_fib_net_exit(struct net *net) | 999 | static void __net_exit ip_fib_net_exit(struct net *net) |
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index a15b2f1b2721..76b9c684cccd 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c | |||
@@ -424,19 +424,43 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) | |||
424 | 424 | ||
425 | if (fa && fa->fa_tos == tos && | 425 | if (fa && fa->fa_tos == tos && |
426 | fa->fa_info->fib_priority == fi->fib_priority) { | 426 | fa->fa_info->fib_priority == fi->fib_priority) { |
427 | struct fib_alias *fa_orig; | 427 | struct fib_alias *fa_first, *fa_match; |
428 | 428 | ||
429 | err = -EEXIST; | 429 | err = -EEXIST; |
430 | if (cfg->fc_nlflags & NLM_F_EXCL) | 430 | if (cfg->fc_nlflags & NLM_F_EXCL) |
431 | goto out; | 431 | goto out; |
432 | 432 | ||
433 | /* We have 2 goals: | ||
434 | * 1. Find exact match for type, scope, fib_info to avoid | ||
435 | * duplicate routes | ||
436 | * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it | ||
437 | */ | ||
438 | fa_match = NULL; | ||
439 | fa_first = fa; | ||
440 | fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); | ||
441 | list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { | ||
442 | if (fa->fa_tos != tos) | ||
443 | break; | ||
444 | if (fa->fa_info->fib_priority != fi->fib_priority) | ||
445 | break; | ||
446 | if (fa->fa_type == cfg->fc_type && | ||
447 | fa->fa_scope == cfg->fc_scope && | ||
448 | fa->fa_info == fi) { | ||
449 | fa_match = fa; | ||
450 | break; | ||
451 | } | ||
452 | } | ||
453 | |||
433 | if (cfg->fc_nlflags & NLM_F_REPLACE) { | 454 | if (cfg->fc_nlflags & NLM_F_REPLACE) { |
434 | struct fib_info *fi_drop; | 455 | struct fib_info *fi_drop; |
435 | u8 state; | 456 | u8 state; |
436 | 457 | ||
437 | if (fi->fib_treeref > 1) | 458 | fa = fa_first; |
459 | if (fa_match) { | ||
460 | if (fa == fa_match) | ||
461 | err = 0; | ||
438 | goto out; | 462 | goto out; |
439 | 463 | } | |
440 | write_lock_bh(&fib_hash_lock); | 464 | write_lock_bh(&fib_hash_lock); |
441 | fi_drop = fa->fa_info; | 465 | fi_drop = fa->fa_info; |
442 | fa->fa_info = fi; | 466 | fa->fa_info = fi; |
@@ -459,20 +483,11 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) | |||
459 | * uses the same scope, type, and nexthop | 483 | * uses the same scope, type, and nexthop |
460 | * information. | 484 | * information. |
461 | */ | 485 | */ |
462 | fa_orig = fa; | 486 | if (fa_match) |
463 | fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); | 487 | goto out; |
464 | list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { | 488 | |
465 | if (fa->fa_tos != tos) | ||
466 | break; | ||
467 | if (fa->fa_info->fib_priority != fi->fib_priority) | ||
468 | break; | ||
469 | if (fa->fa_type == cfg->fc_type && | ||
470 | fa->fa_scope == cfg->fc_scope && | ||
471 | fa->fa_info == fi) | ||
472 | goto out; | ||
473 | } | ||
474 | if (!(cfg->fc_nlflags & NLM_F_APPEND)) | 489 | if (!(cfg->fc_nlflags & NLM_F_APPEND)) |
475 | fa = fa_orig; | 490 | fa = fa_first; |
476 | } | 491 | } |
477 | 492 | ||
478 | err = -ENOENT; | 493 | err = -ENOENT; |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c7912866d987..a13c84763d4c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -229,6 +229,8 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) | |||
229 | head = &fib_info_hash[hash]; | 229 | head = &fib_info_hash[hash]; |
230 | 230 | ||
231 | hlist_for_each_entry(fi, node, head, fib_hash) { | 231 | hlist_for_each_entry(fi, node, head, fib_hash) { |
232 | if (fi->fib_net != nfi->fib_net) | ||
233 | continue; | ||
232 | if (fi->fib_nhs != nfi->fib_nhs) | 234 | if (fi->fib_nhs != nfi->fib_nhs) |
233 | continue; | 235 | continue; |
234 | if (nfi->fib_protocol == fi->fib_protocol && | 236 | if (nfi->fib_protocol == fi->fib_protocol && |
@@ -687,6 +689,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
687 | struct fib_info *fi = NULL; | 689 | struct fib_info *fi = NULL; |
688 | struct fib_info *ofi; | 690 | struct fib_info *ofi; |
689 | int nhs = 1; | 691 | int nhs = 1; |
692 | struct net *net = cfg->fc_nlinfo.nl_net; | ||
690 | 693 | ||
691 | /* Fast check to catch the most weird cases */ | 694 | /* Fast check to catch the most weird cases */ |
692 | if (fib_props[cfg->fc_type].scope > cfg->fc_scope) | 695 | if (fib_props[cfg->fc_type].scope > cfg->fc_scope) |
@@ -727,6 +730,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
727 | goto failure; | 730 | goto failure; |
728 | fib_info_cnt++; | 731 | fib_info_cnt++; |
729 | 732 | ||
733 | fi->fib_net = net; | ||
730 | fi->fib_protocol = cfg->fc_protocol; | 734 | fi->fib_protocol = cfg->fc_protocol; |
731 | fi->fib_flags = cfg->fc_flags; | 735 | fi->fib_flags = cfg->fc_flags; |
732 | fi->fib_priority = cfg->fc_priority; | 736 | fi->fib_priority = cfg->fc_priority; |
@@ -798,8 +802,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
798 | if (nhs != 1 || nh->nh_gw) | 802 | if (nhs != 1 || nh->nh_gw) |
799 | goto err_inval; | 803 | goto err_inval; |
800 | nh->nh_scope = RT_SCOPE_NOWHERE; | 804 | nh->nh_scope = RT_SCOPE_NOWHERE; |
801 | nh->nh_dev = dev_get_by_index(cfg->fc_nlinfo.nl_net, | 805 | nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); |
802 | fi->fib_nh->nh_oif); | ||
803 | err = -ENODEV; | 806 | err = -ENODEV; |
804 | if (nh->nh_dev == NULL) | 807 | if (nh->nh_dev == NULL) |
805 | goto failure; | 808 | goto failure; |
@@ -813,8 +816,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
813 | if (fi->fib_prefsrc) { | 816 | if (fi->fib_prefsrc) { |
814 | if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || | 817 | if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || |
815 | fi->fib_prefsrc != cfg->fc_dst) | 818 | fi->fib_prefsrc != cfg->fc_dst) |
816 | if (inet_addr_type(cfg->fc_nlinfo.nl_net, | 819 | if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL) |
817 | fi->fib_prefsrc) != RTN_LOCAL) | ||
818 | goto err_inval; | 820 | goto err_inval; |
819 | } | 821 | } |
820 | 822 | ||
@@ -1031,70 +1033,74 @@ nla_put_failure: | |||
1031 | referring to it. | 1033 | referring to it. |
1032 | - device went down -> we must shutdown all nexthops going via it. | 1034 | - device went down -> we must shutdown all nexthops going via it. |
1033 | */ | 1035 | */ |
1034 | 1036 | int fib_sync_down_addr(struct net *net, __be32 local) | |
1035 | int fib_sync_down(__be32 local, struct net_device *dev, int force) | ||
1036 | { | 1037 | { |
1037 | int ret = 0; | 1038 | int ret = 0; |
1038 | int scope = RT_SCOPE_NOWHERE; | 1039 | unsigned int hash = fib_laddr_hashfn(local); |
1039 | 1040 | struct hlist_head *head = &fib_info_laddrhash[hash]; | |
1040 | if (force) | 1041 | struct hlist_node *node; |
1041 | scope = -1; | 1042 | struct fib_info *fi; |
1042 | 1043 | ||
1043 | if (local && fib_info_laddrhash) { | 1044 | if (fib_info_laddrhash == NULL || local == 0) |
1044 | unsigned int hash = fib_laddr_hashfn(local); | 1045 | return 0; |
1045 | struct hlist_head *head = &fib_info_laddrhash[hash]; | ||
1046 | struct hlist_node *node; | ||
1047 | struct fib_info *fi; | ||
1048 | 1046 | ||
1049 | hlist_for_each_entry(fi, node, head, fib_lhash) { | 1047 | hlist_for_each_entry(fi, node, head, fib_lhash) { |
1050 | if (fi->fib_prefsrc == local) { | 1048 | if (fi->fib_net != net) |
1051 | fi->fib_flags |= RTNH_F_DEAD; | 1049 | continue; |
1052 | ret++; | 1050 | if (fi->fib_prefsrc == local) { |
1053 | } | 1051 | fi->fib_flags |= RTNH_F_DEAD; |
1052 | ret++; | ||
1054 | } | 1053 | } |
1055 | } | 1054 | } |
1055 | return ret; | ||
1056 | } | ||
1056 | 1057 | ||
1057 | if (dev) { | 1058 | int fib_sync_down_dev(struct net_device *dev, int force) |
1058 | struct fib_info *prev_fi = NULL; | 1059 | { |
1059 | unsigned int hash = fib_devindex_hashfn(dev->ifindex); | 1060 | int ret = 0; |
1060 | struct hlist_head *head = &fib_info_devhash[hash]; | 1061 | int scope = RT_SCOPE_NOWHERE; |
1061 | struct hlist_node *node; | 1062 | struct fib_info *prev_fi = NULL; |
1062 | struct fib_nh *nh; | 1063 | unsigned int hash = fib_devindex_hashfn(dev->ifindex); |
1064 | struct hlist_head *head = &fib_info_devhash[hash]; | ||
1065 | struct hlist_node *node; | ||
1066 | struct fib_nh *nh; | ||
1063 | 1067 | ||
1064 | hlist_for_each_entry(nh, node, head, nh_hash) { | 1068 | if (force) |
1065 | struct fib_info *fi = nh->nh_parent; | 1069 | scope = -1; |
1066 | int dead; | ||
1067 | 1070 | ||
1068 | BUG_ON(!fi->fib_nhs); | 1071 | hlist_for_each_entry(nh, node, head, nh_hash) { |
1069 | if (nh->nh_dev != dev || fi == prev_fi) | 1072 | struct fib_info *fi = nh->nh_parent; |
1070 | continue; | 1073 | int dead; |
1071 | prev_fi = fi; | 1074 | |
1072 | dead = 0; | 1075 | BUG_ON(!fi->fib_nhs); |
1073 | change_nexthops(fi) { | 1076 | if (nh->nh_dev != dev || fi == prev_fi) |
1074 | if (nh->nh_flags&RTNH_F_DEAD) | 1077 | continue; |
1075 | dead++; | 1078 | prev_fi = fi; |
1076 | else if (nh->nh_dev == dev && | 1079 | dead = 0; |
1077 | nh->nh_scope != scope) { | 1080 | change_nexthops(fi) { |
1078 | nh->nh_flags |= RTNH_F_DEAD; | 1081 | if (nh->nh_flags&RTNH_F_DEAD) |
1082 | dead++; | ||
1083 | else if (nh->nh_dev == dev && | ||
1084 | nh->nh_scope != scope) { | ||
1085 | nh->nh_flags |= RTNH_F_DEAD; | ||
1079 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 1086 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
1080 | spin_lock_bh(&fib_multipath_lock); | 1087 | spin_lock_bh(&fib_multipath_lock); |
1081 | fi->fib_power -= nh->nh_power; | 1088 | fi->fib_power -= nh->nh_power; |
1082 | nh->nh_power = 0; | 1089 | nh->nh_power = 0; |
1083 | spin_unlock_bh(&fib_multipath_lock); | 1090 | spin_unlock_bh(&fib_multipath_lock); |
1084 | #endif | 1091 | #endif |
1085 | dead++; | 1092 | dead++; |
1086 | } | 1093 | } |
1087 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 1094 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
1088 | if (force > 1 && nh->nh_dev == dev) { | 1095 | if (force > 1 && nh->nh_dev == dev) { |
1089 | dead = fi->fib_nhs; | 1096 | dead = fi->fib_nhs; |
1090 | break; | 1097 | break; |
1091 | } | ||
1092 | #endif | ||
1093 | } endfor_nexthops(fi) | ||
1094 | if (dead == fi->fib_nhs) { | ||
1095 | fi->fib_flags |= RTNH_F_DEAD; | ||
1096 | ret++; | ||
1097 | } | 1098 | } |
1099 | #endif | ||
1100 | } endfor_nexthops(fi) | ||
1101 | if (dead == fi->fib_nhs) { | ||
1102 | fi->fib_flags |= RTNH_F_DEAD; | ||
1103 | ret++; | ||
1098 | } | 1104 | } |
1099 | } | 1105 | } |
1100 | 1106 | ||
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index f2f47033f31f..35851c96bdfb 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -1205,20 +1205,45 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) | |||
1205 | * and we need to allocate a new one of those as well. | 1205 | * and we need to allocate a new one of those as well. |
1206 | */ | 1206 | */ |
1207 | 1207 | ||
1208 | if (fa && fa->fa_info->fib_priority == fi->fib_priority) { | 1208 | if (fa && fa->fa_tos == tos && |
1209 | struct fib_alias *fa_orig; | 1209 | fa->fa_info->fib_priority == fi->fib_priority) { |
1210 | struct fib_alias *fa_first, *fa_match; | ||
1210 | 1211 | ||
1211 | err = -EEXIST; | 1212 | err = -EEXIST; |
1212 | if (cfg->fc_nlflags & NLM_F_EXCL) | 1213 | if (cfg->fc_nlflags & NLM_F_EXCL) |
1213 | goto out; | 1214 | goto out; |
1214 | 1215 | ||
1216 | /* We have 2 goals: | ||
1217 | * 1. Find exact match for type, scope, fib_info to avoid | ||
1218 | * duplicate routes | ||
1219 | * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it | ||
1220 | */ | ||
1221 | fa_match = NULL; | ||
1222 | fa_first = fa; | ||
1223 | fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); | ||
1224 | list_for_each_entry_continue(fa, fa_head, fa_list) { | ||
1225 | if (fa->fa_tos != tos) | ||
1226 | break; | ||
1227 | if (fa->fa_info->fib_priority != fi->fib_priority) | ||
1228 | break; | ||
1229 | if (fa->fa_type == cfg->fc_type && | ||
1230 | fa->fa_scope == cfg->fc_scope && | ||
1231 | fa->fa_info == fi) { | ||
1232 | fa_match = fa; | ||
1233 | break; | ||
1234 | } | ||
1235 | } | ||
1236 | |||
1215 | if (cfg->fc_nlflags & NLM_F_REPLACE) { | 1237 | if (cfg->fc_nlflags & NLM_F_REPLACE) { |
1216 | struct fib_info *fi_drop; | 1238 | struct fib_info *fi_drop; |
1217 | u8 state; | 1239 | u8 state; |
1218 | 1240 | ||
1219 | if (fi->fib_treeref > 1) | 1241 | fa = fa_first; |
1242 | if (fa_match) { | ||
1243 | if (fa == fa_match) | ||
1244 | err = 0; | ||
1220 | goto out; | 1245 | goto out; |
1221 | 1246 | } | |
1222 | err = -ENOBUFS; | 1247 | err = -ENOBUFS; |
1223 | new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); | 1248 | new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); |
1224 | if (new_fa == NULL) | 1249 | if (new_fa == NULL) |
@@ -1230,7 +1255,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) | |||
1230 | new_fa->fa_type = cfg->fc_type; | 1255 | new_fa->fa_type = cfg->fc_type; |
1231 | new_fa->fa_scope = cfg->fc_scope; | 1256 | new_fa->fa_scope = cfg->fc_scope; |
1232 | state = fa->fa_state; | 1257 | state = fa->fa_state; |
1233 | new_fa->fa_state &= ~FA_S_ACCESSED; | 1258 | new_fa->fa_state = state & ~FA_S_ACCESSED; |
1234 | 1259 | ||
1235 | list_replace_rcu(&fa->fa_list, &new_fa->fa_list); | 1260 | list_replace_rcu(&fa->fa_list, &new_fa->fa_list); |
1236 | alias_free_mem_rcu(fa); | 1261 | alias_free_mem_rcu(fa); |
@@ -1247,20 +1272,11 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) | |||
1247 | * uses the same scope, type, and nexthop | 1272 | * uses the same scope, type, and nexthop |
1248 | * information. | 1273 | * information. |
1249 | */ | 1274 | */ |
1250 | fa_orig = fa; | 1275 | if (fa_match) |
1251 | list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) { | 1276 | goto out; |
1252 | if (fa->fa_tos != tos) | ||
1253 | break; | ||
1254 | if (fa->fa_info->fib_priority != fi->fib_priority) | ||
1255 | break; | ||
1256 | if (fa->fa_type == cfg->fc_type && | ||
1257 | fa->fa_scope == cfg->fc_scope && | ||
1258 | fa->fa_info == fi) | ||
1259 | goto out; | ||
1260 | } | ||
1261 | 1277 | ||
1262 | if (!(cfg->fc_nlflags & NLM_F_APPEND)) | 1278 | if (!(cfg->fc_nlflags & NLM_F_APPEND)) |
1263 | fa = fa_orig; | 1279 | fa = fa_first; |
1264 | } | 1280 | } |
1265 | err = -ENOENT; | 1281 | err = -ENOENT; |
1266 | if (!(cfg->fc_nlflags & NLM_F_CREATE)) | 1282 | if (!(cfg->fc_nlflags & NLM_F_CREATE)) |
@@ -1600,9 +1616,8 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) | |||
1600 | pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); | 1616 | pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); |
1601 | 1617 | ||
1602 | fa_to_delete = NULL; | 1618 | fa_to_delete = NULL; |
1603 | fa_head = fa->fa_list.prev; | 1619 | fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); |
1604 | 1620 | list_for_each_entry_continue(fa, fa_head, fa_list) { | |
1605 | list_for_each_entry(fa, fa_head, fa_list) { | ||
1606 | struct fib_info *fi = fa->fa_info; | 1621 | struct fib_info *fi = fa->fa_info; |
1607 | 1622 | ||
1608 | if (fa->fa_tos != tos) | 1623 | if (fa->fa_tos != tos) |
@@ -1743,6 +1758,19 @@ static struct leaf *trie_nextleaf(struct leaf *l) | |||
1743 | return leaf_walk_rcu(p, c); | 1758 | return leaf_walk_rcu(p, c); |
1744 | } | 1759 | } |
1745 | 1760 | ||
1761 | static struct leaf *trie_leafindex(struct trie *t, int index) | ||
1762 | { | ||
1763 | struct leaf *l = trie_firstleaf(t); | ||
1764 | |||
1765 | while (index-- > 0) { | ||
1766 | l = trie_nextleaf(l); | ||
1767 | if (!l) | ||
1768 | break; | ||
1769 | } | ||
1770 | return l; | ||
1771 | } | ||
1772 | |||
1773 | |||
1746 | /* | 1774 | /* |
1747 | * Caller must hold RTNL. | 1775 | * Caller must hold RTNL. |
1748 | */ | 1776 | */ |
@@ -1848,7 +1876,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, | |||
1848 | struct fib_alias *fa; | 1876 | struct fib_alias *fa; |
1849 | __be32 xkey = htonl(key); | 1877 | __be32 xkey = htonl(key); |
1850 | 1878 | ||
1851 | s_i = cb->args[4]; | 1879 | s_i = cb->args[5]; |
1852 | i = 0; | 1880 | i = 0; |
1853 | 1881 | ||
1854 | /* rcu_read_lock is hold by caller */ | 1882 | /* rcu_read_lock is hold by caller */ |
@@ -1869,12 +1897,12 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, | |||
1869 | plen, | 1897 | plen, |
1870 | fa->fa_tos, | 1898 | fa->fa_tos, |
1871 | fa->fa_info, NLM_F_MULTI) < 0) { | 1899 | fa->fa_info, NLM_F_MULTI) < 0) { |
1872 | cb->args[4] = i; | 1900 | cb->args[5] = i; |
1873 | return -1; | 1901 | return -1; |
1874 | } | 1902 | } |
1875 | i++; | 1903 | i++; |
1876 | } | 1904 | } |
1877 | cb->args[4] = i; | 1905 | cb->args[5] = i; |
1878 | return skb->len; | 1906 | return skb->len; |
1879 | } | 1907 | } |
1880 | 1908 | ||
@@ -1885,7 +1913,7 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb, | |||
1885 | struct hlist_node *node; | 1913 | struct hlist_node *node; |
1886 | int i, s_i; | 1914 | int i, s_i; |
1887 | 1915 | ||
1888 | s_i = cb->args[3]; | 1916 | s_i = cb->args[4]; |
1889 | i = 0; | 1917 | i = 0; |
1890 | 1918 | ||
1891 | /* rcu_read_lock is hold by caller */ | 1919 | /* rcu_read_lock is hold by caller */ |
@@ -1896,19 +1924,19 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb, | |||
1896 | } | 1924 | } |
1897 | 1925 | ||
1898 | if (i > s_i) | 1926 | if (i > s_i) |
1899 | cb->args[4] = 0; | 1927 | cb->args[5] = 0; |
1900 | 1928 | ||
1901 | if (list_empty(&li->falh)) | 1929 | if (list_empty(&li->falh)) |
1902 | continue; | 1930 | continue; |
1903 | 1931 | ||
1904 | if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) { | 1932 | if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) { |
1905 | cb->args[3] = i; | 1933 | cb->args[4] = i; |
1906 | return -1; | 1934 | return -1; |
1907 | } | 1935 | } |
1908 | i++; | 1936 | i++; |
1909 | } | 1937 | } |
1910 | 1938 | ||
1911 | cb->args[3] = i; | 1939 | cb->args[4] = i; |
1912 | return skb->len; | 1940 | return skb->len; |
1913 | } | 1941 | } |
1914 | 1942 | ||
@@ -1918,35 +1946,37 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, | |||
1918 | struct leaf *l; | 1946 | struct leaf *l; |
1919 | struct trie *t = (struct trie *) tb->tb_data; | 1947 | struct trie *t = (struct trie *) tb->tb_data; |
1920 | t_key key = cb->args[2]; | 1948 | t_key key = cb->args[2]; |
1949 | int count = cb->args[3]; | ||
1921 | 1950 | ||
1922 | rcu_read_lock(); | 1951 | rcu_read_lock(); |
1923 | /* Dump starting at last key. | 1952 | /* Dump starting at last key. |
1924 | * Note: 0.0.0.0/0 (ie default) is first key. | 1953 | * Note: 0.0.0.0/0 (ie default) is first key. |
1925 | */ | 1954 | */ |
1926 | if (!key) | 1955 | if (count == 0) |
1927 | l = trie_firstleaf(t); | 1956 | l = trie_firstleaf(t); |
1928 | else { | 1957 | else { |
1958 | /* Normally, continue from last key, but if that is missing | ||
1959 | * fallback to using slow rescan | ||
1960 | */ | ||
1929 | l = fib_find_node(t, key); | 1961 | l = fib_find_node(t, key); |
1930 | if (!l) { | 1962 | if (!l) |
1931 | /* The table changed during the dump, rather than | 1963 | l = trie_leafindex(t, count); |
1932 | * giving partial data, just make application retry. | ||
1933 | */ | ||
1934 | rcu_read_unlock(); | ||
1935 | return -EBUSY; | ||
1936 | } | ||
1937 | } | 1964 | } |
1938 | 1965 | ||
1939 | while (l) { | 1966 | while (l) { |
1940 | cb->args[2] = l->key; | 1967 | cb->args[2] = l->key; |
1941 | if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) { | 1968 | if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) { |
1969 | cb->args[3] = count; | ||
1942 | rcu_read_unlock(); | 1970 | rcu_read_unlock(); |
1943 | return -1; | 1971 | return -1; |
1944 | } | 1972 | } |
1945 | 1973 | ||
1974 | ++count; | ||
1946 | l = trie_nextleaf(l); | 1975 | l = trie_nextleaf(l); |
1947 | memset(&cb->args[3], 0, | 1976 | memset(&cb->args[4], 0, |
1948 | sizeof(cb->args) - 3*sizeof(cb->args[0])); | 1977 | sizeof(cb->args) - 4*sizeof(cb->args[0])); |
1949 | } | 1978 | } |
1979 | cb->args[3] = count; | ||
1950 | rcu_read_unlock(); | 1980 | rcu_read_unlock(); |
1951 | 1981 | ||
1952 | return skb->len; | 1982 | return skb->len; |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7801cceb2d1b..de5a41de191a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -87,6 +87,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo, | |||
87 | struct hlist_node *node; | 87 | struct hlist_node *node; |
88 | struct inet_bind_bucket *tb; | 88 | struct inet_bind_bucket *tb; |
89 | int ret; | 89 | int ret; |
90 | struct net *net = sk->sk_net; | ||
90 | 91 | ||
91 | local_bh_disable(); | 92 | local_bh_disable(); |
92 | if (!snum) { | 93 | if (!snum) { |
@@ -100,7 +101,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo, | |||
100 | head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; | 101 | head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; |
101 | spin_lock(&head->lock); | 102 | spin_lock(&head->lock); |
102 | inet_bind_bucket_for_each(tb, node, &head->chain) | 103 | inet_bind_bucket_for_each(tb, node, &head->chain) |
103 | if (tb->port == rover) | 104 | if (tb->ib_net == net && tb->port == rover) |
104 | goto next; | 105 | goto next; |
105 | break; | 106 | break; |
106 | next: | 107 | next: |
@@ -127,7 +128,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo, | |||
127 | head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; | 128 | head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; |
128 | spin_lock(&head->lock); | 129 | spin_lock(&head->lock); |
129 | inet_bind_bucket_for_each(tb, node, &head->chain) | 130 | inet_bind_bucket_for_each(tb, node, &head->chain) |
130 | if (tb->port == snum) | 131 | if (tb->ib_net == net && tb->port == snum) |
131 | goto tb_found; | 132 | goto tb_found; |
132 | } | 133 | } |
133 | tb = NULL; | 134 | tb = NULL; |
@@ -147,7 +148,8 @@ tb_found: | |||
147 | } | 148 | } |
148 | tb_not_found: | 149 | tb_not_found: |
149 | ret = 1; | 150 | ret = 1; |
150 | if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) | 151 | if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, |
152 | net, head, snum)) == NULL) | ||
151 | goto fail_unlock; | 153 | goto fail_unlock; |
152 | if (hlist_empty(&tb->owners)) { | 154 | if (hlist_empty(&tb->owners)) { |
153 | if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) | 155 | if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 605ed2cd7972..da97695e7096 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
@@ -259,20 +259,22 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, | |||
259 | const struct inet_diag_handler *handler; | 259 | const struct inet_diag_handler *handler; |
260 | 260 | ||
261 | handler = inet_diag_lock_handler(nlh->nlmsg_type); | 261 | handler = inet_diag_lock_handler(nlh->nlmsg_type); |
262 | if (!handler) | 262 | if (IS_ERR(handler)) { |
263 | return -ENOENT; | 263 | err = PTR_ERR(handler); |
264 | goto unlock; | ||
265 | } | ||
264 | 266 | ||
265 | hashinfo = handler->idiag_hashinfo; | 267 | hashinfo = handler->idiag_hashinfo; |
266 | err = -EINVAL; | 268 | err = -EINVAL; |
267 | 269 | ||
268 | if (req->idiag_family == AF_INET) { | 270 | if (req->idiag_family == AF_INET) { |
269 | sk = inet_lookup(hashinfo, req->id.idiag_dst[0], | 271 | sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0], |
270 | req->id.idiag_dport, req->id.idiag_src[0], | 272 | req->id.idiag_dport, req->id.idiag_src[0], |
271 | req->id.idiag_sport, req->id.idiag_if); | 273 | req->id.idiag_sport, req->id.idiag_if); |
272 | } | 274 | } |
273 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) | 275 | #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) |
274 | else if (req->idiag_family == AF_INET6) { | 276 | else if (req->idiag_family == AF_INET6) { |
275 | sk = inet6_lookup(hashinfo, | 277 | sk = inet6_lookup(&init_net, hashinfo, |
276 | (struct in6_addr *)req->id.idiag_dst, | 278 | (struct in6_addr *)req->id.idiag_dst, |
277 | req->id.idiag_dport, | 279 | req->id.idiag_dport, |
278 | (struct in6_addr *)req->id.idiag_src, | 280 | (struct in6_addr *)req->id.idiag_src, |
@@ -708,8 +710,8 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
708 | struct inet_hashinfo *hashinfo; | 710 | struct inet_hashinfo *hashinfo; |
709 | 711 | ||
710 | handler = inet_diag_lock_handler(cb->nlh->nlmsg_type); | 712 | handler = inet_diag_lock_handler(cb->nlh->nlmsg_type); |
711 | if (!handler) | 713 | if (IS_ERR(handler)) |
712 | goto no_handler; | 714 | goto unlock; |
713 | 715 | ||
714 | hashinfo = handler->idiag_hashinfo; | 716 | hashinfo = handler->idiag_hashinfo; |
715 | 717 | ||
@@ -838,7 +840,6 @@ done: | |||
838 | cb->args[2] = num; | 840 | cb->args[2] = num; |
839 | unlock: | 841 | unlock: |
840 | inet_diag_unlock_handler(handler); | 842 | inet_diag_unlock_handler(handler); |
841 | no_handler: | ||
842 | return skb->len; | 843 | return skb->len; |
843 | } | 844 | } |
844 | 845 | ||
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 619c63c6948a..48d45008f749 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -28,12 +28,14 @@ | |||
28 | * The bindhash mutex for snum's hash chain must be held here. | 28 | * The bindhash mutex for snum's hash chain must be held here. |
29 | */ | 29 | */ |
30 | struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, | 30 | struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, |
31 | struct net *net, | ||
31 | struct inet_bind_hashbucket *head, | 32 | struct inet_bind_hashbucket *head, |
32 | const unsigned short snum) | 33 | const unsigned short snum) |
33 | { | 34 | { |
34 | struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); | 35 | struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); |
35 | 36 | ||
36 | if (tb != NULL) { | 37 | if (tb != NULL) { |
38 | tb->ib_net = net; | ||
37 | tb->port = snum; | 39 | tb->port = snum; |
38 | tb->fastreuse = 0; | 40 | tb->fastreuse = 0; |
39 | INIT_HLIST_HEAD(&tb->owners); | 41 | INIT_HLIST_HEAD(&tb->owners); |
@@ -125,7 +127,8 @@ EXPORT_SYMBOL(inet_listen_wlock); | |||
125 | * remote address for the connection. So always assume those are both | 127 | * remote address for the connection. So always assume those are both |
126 | * wildcarded during the search since they can never be otherwise. | 128 | * wildcarded during the search since they can never be otherwise. |
127 | */ | 129 | */ |
128 | static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, | 130 | static struct sock *inet_lookup_listener_slow(struct net *net, |
131 | const struct hlist_head *head, | ||
129 | const __be32 daddr, | 132 | const __be32 daddr, |
130 | const unsigned short hnum, | 133 | const unsigned short hnum, |
131 | const int dif) | 134 | const int dif) |
@@ -137,7 +140,8 @@ static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, | |||
137 | sk_for_each(sk, node, head) { | 140 | sk_for_each(sk, node, head) { |
138 | const struct inet_sock *inet = inet_sk(sk); | 141 | const struct inet_sock *inet = inet_sk(sk); |
139 | 142 | ||
140 | if (inet->num == hnum && !ipv6_only_sock(sk)) { | 143 | if (sk->sk_net == net && inet->num == hnum && |
144 | !ipv6_only_sock(sk)) { | ||
141 | const __be32 rcv_saddr = inet->rcv_saddr; | 145 | const __be32 rcv_saddr = inet->rcv_saddr; |
142 | int score = sk->sk_family == PF_INET ? 1 : 0; | 146 | int score = sk->sk_family == PF_INET ? 1 : 0; |
143 | 147 | ||
@@ -163,7 +167,8 @@ static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, | |||
163 | } | 167 | } |
164 | 168 | ||
165 | /* Optimize the common listener case. */ | 169 | /* Optimize the common listener case. */ |
166 | struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, | 170 | struct sock *__inet_lookup_listener(struct net *net, |
171 | struct inet_hashinfo *hashinfo, | ||
167 | const __be32 daddr, const unsigned short hnum, | 172 | const __be32 daddr, const unsigned short hnum, |
168 | const int dif) | 173 | const int dif) |
169 | { | 174 | { |
@@ -178,9 +183,9 @@ struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, | |||
178 | if (inet->num == hnum && !sk->sk_node.next && | 183 | if (inet->num == hnum && !sk->sk_node.next && |
179 | (!inet->rcv_saddr || inet->rcv_saddr == daddr) && | 184 | (!inet->rcv_saddr || inet->rcv_saddr == daddr) && |
180 | (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && | 185 | (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && |
181 | !sk->sk_bound_dev_if) | 186 | !sk->sk_bound_dev_if && sk->sk_net == net) |
182 | goto sherry_cache; | 187 | goto sherry_cache; |
183 | sk = inet_lookup_listener_slow(head, daddr, hnum, dif); | 188 | sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif); |
184 | } | 189 | } |
185 | if (sk) { | 190 | if (sk) { |
186 | sherry_cache: | 191 | sherry_cache: |
@@ -191,7 +196,8 @@ sherry_cache: | |||
191 | } | 196 | } |
192 | EXPORT_SYMBOL_GPL(__inet_lookup_listener); | 197 | EXPORT_SYMBOL_GPL(__inet_lookup_listener); |
193 | 198 | ||
194 | struct sock * __inet_lookup_established(struct inet_hashinfo *hashinfo, | 199 | struct sock * __inet_lookup_established(struct net *net, |
200 | struct inet_hashinfo *hashinfo, | ||
195 | const __be32 saddr, const __be16 sport, | 201 | const __be32 saddr, const __be16 sport, |
196 | const __be32 daddr, const u16 hnum, | 202 | const __be32 daddr, const u16 hnum, |
197 | const int dif) | 203 | const int dif) |
@@ -210,13 +216,15 @@ struct sock * __inet_lookup_established(struct inet_hashinfo *hashinfo, | |||
210 | prefetch(head->chain.first); | 216 | prefetch(head->chain.first); |
211 | read_lock(lock); | 217 | read_lock(lock); |
212 | sk_for_each(sk, node, &head->chain) { | 218 | sk_for_each(sk, node, &head->chain) { |
213 | if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) | 219 | if (INET_MATCH(sk, net, hash, acookie, |
220 | saddr, daddr, ports, dif)) | ||
214 | goto hit; /* You sunk my battleship! */ | 221 | goto hit; /* You sunk my battleship! */ |
215 | } | 222 | } |
216 | 223 | ||
217 | /* Must check for a TIME_WAIT'er before going to listener hash. */ | 224 | /* Must check for a TIME_WAIT'er before going to listener hash. */ |
218 | sk_for_each(sk, node, &head->twchain) { | 225 | sk_for_each(sk, node, &head->twchain) { |
219 | if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) | 226 | if (INET_TW_MATCH(sk, net, hash, acookie, |
227 | saddr, daddr, ports, dif)) | ||
220 | goto hit; | 228 | goto hit; |
221 | } | 229 | } |
222 | sk = NULL; | 230 | sk = NULL; |
@@ -247,6 +255,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, | |||
247 | struct sock *sk2; | 255 | struct sock *sk2; |
248 | const struct hlist_node *node; | 256 | const struct hlist_node *node; |
249 | struct inet_timewait_sock *tw; | 257 | struct inet_timewait_sock *tw; |
258 | struct net *net = sk->sk_net; | ||
250 | 259 | ||
251 | prefetch(head->chain.first); | 260 | prefetch(head->chain.first); |
252 | write_lock(lock); | 261 | write_lock(lock); |
@@ -255,7 +264,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, | |||
255 | sk_for_each(sk2, node, &head->twchain) { | 264 | sk_for_each(sk2, node, &head->twchain) { |
256 | tw = inet_twsk(sk2); | 265 | tw = inet_twsk(sk2); |
257 | 266 | ||
258 | if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { | 267 | if (INET_TW_MATCH(sk2, net, hash, acookie, |
268 | saddr, daddr, ports, dif)) { | ||
259 | if (twsk_unique(sk, sk2, twp)) | 269 | if (twsk_unique(sk, sk2, twp)) |
260 | goto unique; | 270 | goto unique; |
261 | else | 271 | else |
@@ -266,7 +276,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, | |||
266 | 276 | ||
267 | /* And established part... */ | 277 | /* And established part... */ |
268 | sk_for_each(sk2, node, &head->chain) { | 278 | sk_for_each(sk2, node, &head->chain) { |
269 | if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) | 279 | if (INET_MATCH(sk2, net, hash, acookie, |
280 | saddr, daddr, ports, dif)) | ||
270 | goto not_unique; | 281 | goto not_unique; |
271 | } | 282 | } |
272 | 283 | ||
@@ -348,17 +359,18 @@ void __inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk) | |||
348 | } | 359 | } |
349 | EXPORT_SYMBOL_GPL(__inet_hash); | 360 | EXPORT_SYMBOL_GPL(__inet_hash); |
350 | 361 | ||
351 | /* | 362 | int __inet_hash_connect(struct inet_timewait_death_row *death_row, |
352 | * Bind a port for a connect operation and hash it. | 363 | struct sock *sk, |
353 | */ | 364 | int (*check_established)(struct inet_timewait_death_row *, |
354 | int inet_hash_connect(struct inet_timewait_death_row *death_row, | 365 | struct sock *, __u16, struct inet_timewait_sock **), |
355 | struct sock *sk) | 366 | void (*hash)(struct inet_hashinfo *, struct sock *)) |
356 | { | 367 | { |
357 | struct inet_hashinfo *hinfo = death_row->hashinfo; | 368 | struct inet_hashinfo *hinfo = death_row->hashinfo; |
358 | const unsigned short snum = inet_sk(sk)->num; | 369 | const unsigned short snum = inet_sk(sk)->num; |
359 | struct inet_bind_hashbucket *head; | 370 | struct inet_bind_hashbucket *head; |
360 | struct inet_bind_bucket *tb; | 371 | struct inet_bind_bucket *tb; |
361 | int ret; | 372 | int ret; |
373 | struct net *net = sk->sk_net; | ||
362 | 374 | ||
363 | if (!snum) { | 375 | if (!snum) { |
364 | int i, remaining, low, high, port; | 376 | int i, remaining, low, high, port; |
@@ -381,19 +393,19 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, | |||
381 | * unique enough. | 393 | * unique enough. |
382 | */ | 394 | */ |
383 | inet_bind_bucket_for_each(tb, node, &head->chain) { | 395 | inet_bind_bucket_for_each(tb, node, &head->chain) { |
384 | if (tb->port == port) { | 396 | if (tb->ib_net == net && tb->port == port) { |
385 | BUG_TRAP(!hlist_empty(&tb->owners)); | 397 | BUG_TRAP(!hlist_empty(&tb->owners)); |
386 | if (tb->fastreuse >= 0) | 398 | if (tb->fastreuse >= 0) |
387 | goto next_port; | 399 | goto next_port; |
388 | if (!__inet_check_established(death_row, | 400 | if (!check_established(death_row, sk, |
389 | sk, port, | 401 | port, &tw)) |
390 | &tw)) | ||
391 | goto ok; | 402 | goto ok; |
392 | goto next_port; | 403 | goto next_port; |
393 | } | 404 | } |
394 | } | 405 | } |
395 | 406 | ||
396 | tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port); | 407 | tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, |
408 | net, head, port); | ||
397 | if (!tb) { | 409 | if (!tb) { |
398 | spin_unlock(&head->lock); | 410 | spin_unlock(&head->lock); |
399 | break; | 411 | break; |
@@ -415,7 +427,7 @@ ok: | |||
415 | inet_bind_hash(sk, tb, port); | 427 | inet_bind_hash(sk, tb, port); |
416 | if (sk_unhashed(sk)) { | 428 | if (sk_unhashed(sk)) { |
417 | inet_sk(sk)->sport = htons(port); | 429 | inet_sk(sk)->sport = htons(port); |
418 | __inet_hash_nolisten(hinfo, sk); | 430 | hash(hinfo, sk); |
419 | } | 431 | } |
420 | spin_unlock(&head->lock); | 432 | spin_unlock(&head->lock); |
421 | 433 | ||
@@ -432,17 +444,28 @@ ok: | |||
432 | tb = inet_csk(sk)->icsk_bind_hash; | 444 | tb = inet_csk(sk)->icsk_bind_hash; |
433 | spin_lock_bh(&head->lock); | 445 | spin_lock_bh(&head->lock); |
434 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { | 446 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { |
435 | __inet_hash_nolisten(hinfo, sk); | 447 | hash(hinfo, sk); |
436 | spin_unlock_bh(&head->lock); | 448 | spin_unlock_bh(&head->lock); |
437 | return 0; | 449 | return 0; |
438 | } else { | 450 | } else { |
439 | spin_unlock(&head->lock); | 451 | spin_unlock(&head->lock); |
440 | /* No definite answer... Walk to established hash table */ | 452 | /* No definite answer... Walk to established hash table */ |
441 | ret = __inet_check_established(death_row, sk, snum, NULL); | 453 | ret = check_established(death_row, sk, snum, NULL); |
442 | out: | 454 | out: |
443 | local_bh_enable(); | 455 | local_bh_enable(); |
444 | return ret; | 456 | return ret; |
445 | } | 457 | } |
446 | } | 458 | } |
459 | EXPORT_SYMBOL_GPL(__inet_hash_connect); | ||
460 | |||
461 | /* | ||
462 | * Bind a port for a connect operation and hash it. | ||
463 | */ | ||
464 | int inet_hash_connect(struct inet_timewait_death_row *death_row, | ||
465 | struct sock *sk) | ||
466 | { | ||
467 | return __inet_hash_connect(death_row, sk, | ||
468 | __inet_check_established, __inet_hash_nolisten); | ||
469 | } | ||
447 | 470 | ||
448 | EXPORT_SYMBOL_GPL(inet_hash_connect); | 471 | EXPORT_SYMBOL_GPL(inet_hash_connect); |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 18070ca65771..341779e685d9 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -168,6 +168,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | |||
168 | } | 168 | } |
169 | 169 | ||
170 | skb->priority = sk->sk_priority; | 170 | skb->priority = sk->sk_priority; |
171 | skb->mark = sk->sk_mark; | ||
171 | 172 | ||
172 | /* Send it out. */ | 173 | /* Send it out. */ |
173 | return ip_local_out(skb); | 174 | return ip_local_out(skb); |
@@ -385,6 +386,7 @@ packet_routed: | |||
385 | (skb_shinfo(skb)->gso_segs ?: 1) - 1); | 386 | (skb_shinfo(skb)->gso_segs ?: 1) - 1); |
386 | 387 | ||
387 | skb->priority = sk->sk_priority; | 388 | skb->priority = sk->sk_priority; |
389 | skb->mark = sk->sk_mark; | ||
388 | 390 | ||
389 | return ip_local_out(skb); | 391 | return ip_local_out(skb); |
390 | 392 | ||
@@ -476,6 +478,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) | |||
476 | if (skb_shinfo(skb)->frag_list) { | 478 | if (skb_shinfo(skb)->frag_list) { |
477 | struct sk_buff *frag; | 479 | struct sk_buff *frag; |
478 | int first_len = skb_pagelen(skb); | 480 | int first_len = skb_pagelen(skb); |
481 | int truesizes = 0; | ||
479 | 482 | ||
480 | if (first_len - hlen > mtu || | 483 | if (first_len - hlen > mtu || |
481 | ((first_len - hlen) & 7) || | 484 | ((first_len - hlen) & 7) || |
@@ -499,7 +502,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) | |||
499 | sock_hold(skb->sk); | 502 | sock_hold(skb->sk); |
500 | frag->sk = skb->sk; | 503 | frag->sk = skb->sk; |
501 | frag->destructor = sock_wfree; | 504 | frag->destructor = sock_wfree; |
502 | skb->truesize -= frag->truesize; | 505 | truesizes += frag->truesize; |
503 | } | 506 | } |
504 | } | 507 | } |
505 | 508 | ||
@@ -510,6 +513,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) | |||
510 | frag = skb_shinfo(skb)->frag_list; | 513 | frag = skb_shinfo(skb)->frag_list; |
511 | skb_shinfo(skb)->frag_list = NULL; | 514 | skb_shinfo(skb)->frag_list = NULL; |
512 | skb->data_len = first_len - skb_headlen(skb); | 515 | skb->data_len = first_len - skb_headlen(skb); |
516 | skb->truesize -= truesizes; | ||
513 | skb->len = first_len; | 517 | skb->len = first_len; |
514 | iph->tot_len = htons(first_len); | 518 | iph->tot_len = htons(first_len); |
515 | iph->frag_off = htons(IP_MF); | 519 | iph->frag_off = htons(IP_MF); |
@@ -1284,6 +1288,7 @@ int ip_push_pending_frames(struct sock *sk) | |||
1284 | iph->daddr = rt->rt_dst; | 1288 | iph->daddr = rt->rt_dst; |
1285 | 1289 | ||
1286 | skb->priority = sk->sk_priority; | 1290 | skb->priority = sk->sk_priority; |
1291 | skb->mark = sk->sk_mark; | ||
1287 | skb->dst = dst_clone(&rt->u.dst); | 1292 | skb->dst = dst_clone(&rt->u.dst); |
1288 | 1293 | ||
1289 | if (iph->protocol == IPPROTO_ICMP) | 1294 | if (iph->protocol == IPPROTO_ICMP) |
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index f4af99ad8fdb..ae1f45fc23b9 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c | |||
@@ -74,6 +74,7 @@ out: | |||
74 | 74 | ||
75 | static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) | 75 | static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) |
76 | { | 76 | { |
77 | int nexthdr; | ||
77 | int err = -ENOMEM; | 78 | int err = -ENOMEM; |
78 | struct ip_comp_hdr *ipch; | 79 | struct ip_comp_hdr *ipch; |
79 | 80 | ||
@@ -84,13 +85,15 @@ static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) | |||
84 | 85 | ||
85 | /* Remove ipcomp header and decompress original payload */ | 86 | /* Remove ipcomp header and decompress original payload */ |
86 | ipch = (void *)skb->data; | 87 | ipch = (void *)skb->data; |
88 | nexthdr = ipch->nexthdr; | ||
89 | |||
87 | skb->transport_header = skb->network_header + sizeof(*ipch); | 90 | skb->transport_header = skb->network_header + sizeof(*ipch); |
88 | __skb_pull(skb, sizeof(*ipch)); | 91 | __skb_pull(skb, sizeof(*ipch)); |
89 | err = ipcomp_decompress(x, skb); | 92 | err = ipcomp_decompress(x, skb); |
90 | if (err) | 93 | if (err) |
91 | goto out; | 94 | goto out; |
92 | 95 | ||
93 | err = ipch->nexthdr; | 96 | err = nexthdr; |
94 | 97 | ||
95 | out: | 98 | out: |
96 | return err; | 99 | return err; |
@@ -434,7 +437,7 @@ error: | |||
434 | goto out; | 437 | goto out; |
435 | } | 438 | } |
436 | 439 | ||
437 | static struct xfrm_type ipcomp_type = { | 440 | static const struct xfrm_type ipcomp_type = { |
438 | .description = "IPCOMP4", | 441 | .description = "IPCOMP4", |
439 | .owner = THIS_MODULE, | 442 | .owner = THIS_MODULE, |
440 | .proto = IPPROTO_COMP, | 443 | .proto = IPPROTO_COMP, |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index b4a810c28ac8..a7591ce344d2 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/mutex.h> | 22 | #include <linux/mutex.h> |
23 | #include <linux/err.h> | 23 | #include <linux/err.h> |
24 | #include <net/compat.h> | 24 | #include <net/compat.h> |
25 | #include <net/sock.h> | ||
25 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
26 | 27 | ||
27 | #include <linux/netfilter/x_tables.h> | 28 | #include <linux/netfilter/x_tables.h> |
@@ -850,7 +851,7 @@ static int compat_table_info(const struct xt_table_info *info, | |||
850 | } | 851 | } |
851 | #endif | 852 | #endif |
852 | 853 | ||
853 | static int get_info(void __user *user, int *len, int compat) | 854 | static int get_info(struct net *net, void __user *user, int *len, int compat) |
854 | { | 855 | { |
855 | char name[ARPT_TABLE_MAXNAMELEN]; | 856 | char name[ARPT_TABLE_MAXNAMELEN]; |
856 | struct arpt_table *t; | 857 | struct arpt_table *t; |
@@ -870,7 +871,7 @@ static int get_info(void __user *user, int *len, int compat) | |||
870 | if (compat) | 871 | if (compat) |
871 | xt_compat_lock(NF_ARP); | 872 | xt_compat_lock(NF_ARP); |
872 | #endif | 873 | #endif |
873 | t = try_then_request_module(xt_find_table_lock(NF_ARP, name), | 874 | t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name), |
874 | "arptable_%s", name); | 875 | "arptable_%s", name); |
875 | if (t && !IS_ERR(t)) { | 876 | if (t && !IS_ERR(t)) { |
876 | struct arpt_getinfo info; | 877 | struct arpt_getinfo info; |
@@ -908,7 +909,8 @@ static int get_info(void __user *user, int *len, int compat) | |||
908 | return ret; | 909 | return ret; |
909 | } | 910 | } |
910 | 911 | ||
911 | static int get_entries(struct arpt_get_entries __user *uptr, int *len) | 912 | static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, |
913 | int *len) | ||
912 | { | 914 | { |
913 | int ret; | 915 | int ret; |
914 | struct arpt_get_entries get; | 916 | struct arpt_get_entries get; |
@@ -926,7 +928,7 @@ static int get_entries(struct arpt_get_entries __user *uptr, int *len) | |||
926 | return -EINVAL; | 928 | return -EINVAL; |
927 | } | 929 | } |
928 | 930 | ||
929 | t = xt_find_table_lock(NF_ARP, get.name); | 931 | t = xt_find_table_lock(net, NF_ARP, get.name); |
930 | if (t && !IS_ERR(t)) { | 932 | if (t && !IS_ERR(t)) { |
931 | struct xt_table_info *private = t->private; | 933 | struct xt_table_info *private = t->private; |
932 | duprintf("t->private->number = %u\n", | 934 | duprintf("t->private->number = %u\n", |
@@ -947,7 +949,8 @@ static int get_entries(struct arpt_get_entries __user *uptr, int *len) | |||
947 | return ret; | 949 | return ret; |
948 | } | 950 | } |
949 | 951 | ||
950 | static int __do_replace(const char *name, unsigned int valid_hooks, | 952 | static int __do_replace(struct net *net, const char *name, |
953 | unsigned int valid_hooks, | ||
951 | struct xt_table_info *newinfo, | 954 | struct xt_table_info *newinfo, |
952 | unsigned int num_counters, | 955 | unsigned int num_counters, |
953 | void __user *counters_ptr) | 956 | void __user *counters_ptr) |
@@ -966,7 +969,7 @@ static int __do_replace(const char *name, unsigned int valid_hooks, | |||
966 | goto out; | 969 | goto out; |
967 | } | 970 | } |
968 | 971 | ||
969 | t = try_then_request_module(xt_find_table_lock(NF_ARP, name), | 972 | t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name), |
970 | "arptable_%s", name); | 973 | "arptable_%s", name); |
971 | if (!t || IS_ERR(t)) { | 974 | if (!t || IS_ERR(t)) { |
972 | ret = t ? PTR_ERR(t) : -ENOENT; | 975 | ret = t ? PTR_ERR(t) : -ENOENT; |
@@ -1019,7 +1022,7 @@ static int __do_replace(const char *name, unsigned int valid_hooks, | |||
1019 | return ret; | 1022 | return ret; |
1020 | } | 1023 | } |
1021 | 1024 | ||
1022 | static int do_replace(void __user *user, unsigned int len) | 1025 | static int do_replace(struct net *net, void __user *user, unsigned int len) |
1023 | { | 1026 | { |
1024 | int ret; | 1027 | int ret; |
1025 | struct arpt_replace tmp; | 1028 | struct arpt_replace tmp; |
@@ -1053,7 +1056,7 @@ static int do_replace(void __user *user, unsigned int len) | |||
1053 | 1056 | ||
1054 | duprintf("arp_tables: Translated table\n"); | 1057 | duprintf("arp_tables: Translated table\n"); |
1055 | 1058 | ||
1056 | ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, | 1059 | ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, |
1057 | tmp.num_counters, tmp.counters); | 1060 | tmp.num_counters, tmp.counters); |
1058 | if (ret) | 1061 | if (ret) |
1059 | goto free_newinfo_untrans; | 1062 | goto free_newinfo_untrans; |
@@ -1080,7 +1083,8 @@ static inline int add_counter_to_entry(struct arpt_entry *e, | |||
1080 | return 0; | 1083 | return 0; |
1081 | } | 1084 | } |
1082 | 1085 | ||
1083 | static int do_add_counters(void __user *user, unsigned int len, int compat) | 1086 | static int do_add_counters(struct net *net, void __user *user, unsigned int len, |
1087 | int compat) | ||
1084 | { | 1088 | { |
1085 | unsigned int i; | 1089 | unsigned int i; |
1086 | struct xt_counters_info tmp; | 1090 | struct xt_counters_info tmp; |
@@ -1132,7 +1136,7 @@ static int do_add_counters(void __user *user, unsigned int len, int compat) | |||
1132 | goto free; | 1136 | goto free; |
1133 | } | 1137 | } |
1134 | 1138 | ||
1135 | t = xt_find_table_lock(NF_ARP, name); | 1139 | t = xt_find_table_lock(net, NF_ARP, name); |
1136 | if (!t || IS_ERR(t)) { | 1140 | if (!t || IS_ERR(t)) { |
1137 | ret = t ? PTR_ERR(t) : -ENOENT; | 1141 | ret = t ? PTR_ERR(t) : -ENOENT; |
1138 | goto free; | 1142 | goto free; |
@@ -1435,7 +1439,8 @@ struct compat_arpt_replace { | |||
1435 | struct compat_arpt_entry entries[0]; | 1439 | struct compat_arpt_entry entries[0]; |
1436 | }; | 1440 | }; |
1437 | 1441 | ||
1438 | static int compat_do_replace(void __user *user, unsigned int len) | 1442 | static int compat_do_replace(struct net *net, void __user *user, |
1443 | unsigned int len) | ||
1439 | { | 1444 | { |
1440 | int ret; | 1445 | int ret; |
1441 | struct compat_arpt_replace tmp; | 1446 | struct compat_arpt_replace tmp; |
@@ -1471,7 +1476,7 @@ static int compat_do_replace(void __user *user, unsigned int len) | |||
1471 | 1476 | ||
1472 | duprintf("compat_do_replace: Translated table\n"); | 1477 | duprintf("compat_do_replace: Translated table\n"); |
1473 | 1478 | ||
1474 | ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, | 1479 | ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, |
1475 | tmp.num_counters, compat_ptr(tmp.counters)); | 1480 | tmp.num_counters, compat_ptr(tmp.counters)); |
1476 | if (ret) | 1481 | if (ret) |
1477 | goto free_newinfo_untrans; | 1482 | goto free_newinfo_untrans; |
@@ -1494,11 +1499,11 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, | |||
1494 | 1499 | ||
1495 | switch (cmd) { | 1500 | switch (cmd) { |
1496 | case ARPT_SO_SET_REPLACE: | 1501 | case ARPT_SO_SET_REPLACE: |
1497 | ret = compat_do_replace(user, len); | 1502 | ret = compat_do_replace(sk->sk_net, user, len); |
1498 | break; | 1503 | break; |
1499 | 1504 | ||
1500 | case ARPT_SO_SET_ADD_COUNTERS: | 1505 | case ARPT_SO_SET_ADD_COUNTERS: |
1501 | ret = do_add_counters(user, len, 1); | 1506 | ret = do_add_counters(sk->sk_net, user, len, 1); |
1502 | break; | 1507 | break; |
1503 | 1508 | ||
1504 | default: | 1509 | default: |
@@ -1584,7 +1589,8 @@ struct compat_arpt_get_entries { | |||
1584 | struct compat_arpt_entry entrytable[0]; | 1589 | struct compat_arpt_entry entrytable[0]; |
1585 | }; | 1590 | }; |
1586 | 1591 | ||
1587 | static int compat_get_entries(struct compat_arpt_get_entries __user *uptr, | 1592 | static int compat_get_entries(struct net *net, |
1593 | struct compat_arpt_get_entries __user *uptr, | ||
1588 | int *len) | 1594 | int *len) |
1589 | { | 1595 | { |
1590 | int ret; | 1596 | int ret; |
@@ -1604,7 +1610,7 @@ static int compat_get_entries(struct compat_arpt_get_entries __user *uptr, | |||
1604 | } | 1610 | } |
1605 | 1611 | ||
1606 | xt_compat_lock(NF_ARP); | 1612 | xt_compat_lock(NF_ARP); |
1607 | t = xt_find_table_lock(NF_ARP, get.name); | 1613 | t = xt_find_table_lock(net, NF_ARP, get.name); |
1608 | if (t && !IS_ERR(t)) { | 1614 | if (t && !IS_ERR(t)) { |
1609 | struct xt_table_info *private = t->private; | 1615 | struct xt_table_info *private = t->private; |
1610 | struct xt_table_info info; | 1616 | struct xt_table_info info; |
@@ -1641,10 +1647,10 @@ static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, | |||
1641 | 1647 | ||
1642 | switch (cmd) { | 1648 | switch (cmd) { |
1643 | case ARPT_SO_GET_INFO: | 1649 | case ARPT_SO_GET_INFO: |
1644 | ret = get_info(user, len, 1); | 1650 | ret = get_info(sk->sk_net, user, len, 1); |
1645 | break; | 1651 | break; |
1646 | case ARPT_SO_GET_ENTRIES: | 1652 | case ARPT_SO_GET_ENTRIES: |
1647 | ret = compat_get_entries(user, len); | 1653 | ret = compat_get_entries(sk->sk_net, user, len); |
1648 | break; | 1654 | break; |
1649 | default: | 1655 | default: |
1650 | ret = do_arpt_get_ctl(sk, cmd, user, len); | 1656 | ret = do_arpt_get_ctl(sk, cmd, user, len); |
@@ -1662,11 +1668,11 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned | |||
1662 | 1668 | ||
1663 | switch (cmd) { | 1669 | switch (cmd) { |
1664 | case ARPT_SO_SET_REPLACE: | 1670 | case ARPT_SO_SET_REPLACE: |
1665 | ret = do_replace(user, len); | 1671 | ret = do_replace(sk->sk_net, user, len); |
1666 | break; | 1672 | break; |
1667 | 1673 | ||
1668 | case ARPT_SO_SET_ADD_COUNTERS: | 1674 | case ARPT_SO_SET_ADD_COUNTERS: |
1669 | ret = do_add_counters(user, len, 0); | 1675 | ret = do_add_counters(sk->sk_net, user, len, 0); |
1670 | break; | 1676 | break; |
1671 | 1677 | ||
1672 | default: | 1678 | default: |
@@ -1686,11 +1692,11 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len | |||
1686 | 1692 | ||
1687 | switch (cmd) { | 1693 | switch (cmd) { |
1688 | case ARPT_SO_GET_INFO: | 1694 | case ARPT_SO_GET_INFO: |
1689 | ret = get_info(user, len, 0); | 1695 | ret = get_info(sk->sk_net, user, len, 0); |
1690 | break; | 1696 | break; |
1691 | 1697 | ||
1692 | case ARPT_SO_GET_ENTRIES: | 1698 | case ARPT_SO_GET_ENTRIES: |
1693 | ret = get_entries(user, len); | 1699 | ret = get_entries(sk->sk_net, user, len); |
1694 | break; | 1700 | break; |
1695 | 1701 | ||
1696 | case ARPT_SO_GET_REVISION_TARGET: { | 1702 | case ARPT_SO_GET_REVISION_TARGET: { |
@@ -1719,19 +1725,21 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len | |||
1719 | return ret; | 1725 | return ret; |
1720 | } | 1726 | } |
1721 | 1727 | ||
1722 | int arpt_register_table(struct arpt_table *table, | 1728 | struct arpt_table *arpt_register_table(struct net *net, |
1723 | const struct arpt_replace *repl) | 1729 | struct arpt_table *table, |
1730 | const struct arpt_replace *repl) | ||
1724 | { | 1731 | { |
1725 | int ret; | 1732 | int ret; |
1726 | struct xt_table_info *newinfo; | 1733 | struct xt_table_info *newinfo; |
1727 | struct xt_table_info bootstrap | 1734 | struct xt_table_info bootstrap |
1728 | = { 0, 0, 0, { 0 }, { 0 }, { } }; | 1735 | = { 0, 0, 0, { 0 }, { 0 }, { } }; |
1729 | void *loc_cpu_entry; | 1736 | void *loc_cpu_entry; |
1737 | struct xt_table *new_table; | ||
1730 | 1738 | ||
1731 | newinfo = xt_alloc_table_info(repl->size); | 1739 | newinfo = xt_alloc_table_info(repl->size); |
1732 | if (!newinfo) { | 1740 | if (!newinfo) { |
1733 | ret = -ENOMEM; | 1741 | ret = -ENOMEM; |
1734 | return ret; | 1742 | goto out; |
1735 | } | 1743 | } |
1736 | 1744 | ||
1737 | /* choose the copy on our node/cpu */ | 1745 | /* choose the copy on our node/cpu */ |
@@ -1745,24 +1753,27 @@ int arpt_register_table(struct arpt_table *table, | |||
1745 | repl->underflow); | 1753 | repl->underflow); |
1746 | 1754 | ||
1747 | duprintf("arpt_register_table: translate table gives %d\n", ret); | 1755 | duprintf("arpt_register_table: translate table gives %d\n", ret); |
1748 | if (ret != 0) { | 1756 | if (ret != 0) |
1749 | xt_free_table_info(newinfo); | 1757 | goto out_free; |
1750 | return ret; | ||
1751 | } | ||
1752 | 1758 | ||
1753 | ret = xt_register_table(table, &bootstrap, newinfo); | 1759 | new_table = xt_register_table(net, table, &bootstrap, newinfo); |
1754 | if (ret != 0) { | 1760 | if (IS_ERR(new_table)) { |
1755 | xt_free_table_info(newinfo); | 1761 | ret = PTR_ERR(new_table); |
1756 | return ret; | 1762 | goto out_free; |
1757 | } | 1763 | } |
1764 | return new_table; | ||
1758 | 1765 | ||
1759 | return 0; | 1766 | out_free: |
1767 | xt_free_table_info(newinfo); | ||
1768 | out: | ||
1769 | return ERR_PTR(ret); | ||
1760 | } | 1770 | } |
1761 | 1771 | ||
1762 | void arpt_unregister_table(struct arpt_table *table) | 1772 | void arpt_unregister_table(struct arpt_table *table) |
1763 | { | 1773 | { |
1764 | struct xt_table_info *private; | 1774 | struct xt_table_info *private; |
1765 | void *loc_cpu_entry; | 1775 | void *loc_cpu_entry; |
1776 | struct module *table_owner = table->me; | ||
1766 | 1777 | ||
1767 | private = xt_unregister_table(table); | 1778 | private = xt_unregister_table(table); |
1768 | 1779 | ||
@@ -1770,6 +1781,8 @@ void arpt_unregister_table(struct arpt_table *table) | |||
1770 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; | 1781 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; |
1771 | ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size, | 1782 | ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size, |
1772 | cleanup_entry, NULL); | 1783 | cleanup_entry, NULL); |
1784 | if (private->number > private->initial_entries) | ||
1785 | module_put(table_owner); | ||
1773 | xt_free_table_info(private); | 1786 | xt_free_table_info(private); |
1774 | } | 1787 | } |
1775 | 1788 | ||
@@ -1809,11 +1822,26 @@ static struct nf_sockopt_ops arpt_sockopts = { | |||
1809 | .owner = THIS_MODULE, | 1822 | .owner = THIS_MODULE, |
1810 | }; | 1823 | }; |
1811 | 1824 | ||
1825 | static int __net_init arp_tables_net_init(struct net *net) | ||
1826 | { | ||
1827 | return xt_proto_init(net, NF_ARP); | ||
1828 | } | ||
1829 | |||
1830 | static void __net_exit arp_tables_net_exit(struct net *net) | ||
1831 | { | ||
1832 | xt_proto_fini(net, NF_ARP); | ||
1833 | } | ||
1834 | |||
1835 | static struct pernet_operations arp_tables_net_ops = { | ||
1836 | .init = arp_tables_net_init, | ||
1837 | .exit = arp_tables_net_exit, | ||
1838 | }; | ||
1839 | |||
1812 | static int __init arp_tables_init(void) | 1840 | static int __init arp_tables_init(void) |
1813 | { | 1841 | { |
1814 | int ret; | 1842 | int ret; |
1815 | 1843 | ||
1816 | ret = xt_proto_init(NF_ARP); | 1844 | ret = register_pernet_subsys(&arp_tables_net_ops); |
1817 | if (ret < 0) | 1845 | if (ret < 0) |
1818 | goto err1; | 1846 | goto err1; |
1819 | 1847 | ||
@@ -1838,7 +1866,7 @@ err4: | |||
1838 | err3: | 1866 | err3: |
1839 | xt_unregister_target(&arpt_standard_target); | 1867 | xt_unregister_target(&arpt_standard_target); |
1840 | err2: | 1868 | err2: |
1841 | xt_proto_fini(NF_ARP); | 1869 | unregister_pernet_subsys(&arp_tables_net_ops); |
1842 | err1: | 1870 | err1: |
1843 | return ret; | 1871 | return ret; |
1844 | } | 1872 | } |
@@ -1848,7 +1876,7 @@ static void __exit arp_tables_fini(void) | |||
1848 | nf_unregister_sockopt(&arpt_sockopts); | 1876 | nf_unregister_sockopt(&arpt_sockopts); |
1849 | xt_unregister_target(&arpt_error_target); | 1877 | xt_unregister_target(&arpt_error_target); |
1850 | xt_unregister_target(&arpt_standard_target); | 1878 | xt_unregister_target(&arpt_standard_target); |
1851 | xt_proto_fini(NF_ARP); | 1879 | unregister_pernet_subsys(&arp_tables_net_ops); |
1852 | } | 1880 | } |
1853 | 1881 | ||
1854 | EXPORT_SYMBOL(arpt_register_table); | 1882 | EXPORT_SYMBOL(arpt_register_table); |
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 7201511d54d2..4e9c496a30c2 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c | |||
@@ -20,7 +20,7 @@ static struct | |||
20 | struct arpt_replace repl; | 20 | struct arpt_replace repl; |
21 | struct arpt_standard entries[3]; | 21 | struct arpt_standard entries[3]; |
22 | struct arpt_error term; | 22 | struct arpt_error term; |
23 | } initial_table __initdata = { | 23 | } initial_table __net_initdata = { |
24 | .repl = { | 24 | .repl = { |
25 | .name = "filter", | 25 | .name = "filter", |
26 | .valid_hooks = FILTER_VALID_HOOKS, | 26 | .valid_hooks = FILTER_VALID_HOOKS, |
@@ -61,7 +61,7 @@ static unsigned int arpt_hook(unsigned int hook, | |||
61 | const struct net_device *out, | 61 | const struct net_device *out, |
62 | int (*okfn)(struct sk_buff *)) | 62 | int (*okfn)(struct sk_buff *)) |
63 | { | 63 | { |
64 | return arpt_do_table(skb, hook, in, out, &packet_filter); | 64 | return arpt_do_table(skb, hook, in, out, init_net.ipv4.arptable_filter); |
65 | } | 65 | } |
66 | 66 | ||
67 | static struct nf_hook_ops arpt_ops[] __read_mostly = { | 67 | static struct nf_hook_ops arpt_ops[] __read_mostly = { |
@@ -85,12 +85,31 @@ static struct nf_hook_ops arpt_ops[] __read_mostly = { | |||
85 | }, | 85 | }, |
86 | }; | 86 | }; |
87 | 87 | ||
88 | static int __net_init arptable_filter_net_init(struct net *net) | ||
89 | { | ||
90 | /* Register table */ | ||
91 | net->ipv4.arptable_filter = | ||
92 | arpt_register_table(net, &packet_filter, &initial_table.repl); | ||
93 | if (IS_ERR(net->ipv4.arptable_filter)) | ||
94 | return PTR_ERR(net->ipv4.arptable_filter); | ||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | static void __net_exit arptable_filter_net_exit(struct net *net) | ||
99 | { | ||
100 | arpt_unregister_table(net->ipv4.arptable_filter); | ||
101 | } | ||
102 | |||
103 | static struct pernet_operations arptable_filter_net_ops = { | ||
104 | .init = arptable_filter_net_init, | ||
105 | .exit = arptable_filter_net_exit, | ||
106 | }; | ||
107 | |||
88 | static int __init arptable_filter_init(void) | 108 | static int __init arptable_filter_init(void) |
89 | { | 109 | { |
90 | int ret; | 110 | int ret; |
91 | 111 | ||
92 | /* Register table */ | 112 | ret = register_pernet_subsys(&arptable_filter_net_ops); |
93 | ret = arpt_register_table(&packet_filter, &initial_table.repl); | ||
94 | if (ret < 0) | 113 | if (ret < 0) |
95 | return ret; | 114 | return ret; |
96 | 115 | ||
@@ -100,14 +119,14 @@ static int __init arptable_filter_init(void) | |||
100 | return ret; | 119 | return ret; |
101 | 120 | ||
102 | cleanup_table: | 121 | cleanup_table: |
103 | arpt_unregister_table(&packet_filter); | 122 | unregister_pernet_subsys(&arptable_filter_net_ops); |
104 | return ret; | 123 | return ret; |
105 | } | 124 | } |
106 | 125 | ||
107 | static void __exit arptable_filter_fini(void) | 126 | static void __exit arptable_filter_fini(void) |
108 | { | 127 | { |
109 | nf_unregister_hooks(arpt_ops, ARRAY_SIZE(arpt_ops)); | 128 | nf_unregister_hooks(arpt_ops, ARRAY_SIZE(arpt_ops)); |
110 | arpt_unregister_table(&packet_filter); | 129 | unregister_pernet_subsys(&arptable_filter_net_ops); |
111 | } | 130 | } |
112 | 131 | ||
113 | module_init(arptable_filter_init); | 132 | module_init(arptable_filter_init); |
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 5109839da222..6bda1102851b 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c | |||
@@ -512,6 +512,7 @@ static struct notifier_block ipq_nl_notifier = { | |||
512 | .notifier_call = ipq_rcv_nl_event, | 512 | .notifier_call = ipq_rcv_nl_event, |
513 | }; | 513 | }; |
514 | 514 | ||
515 | #ifdef CONFIG_SYSCTL | ||
515 | static struct ctl_table_header *ipq_sysctl_header; | 516 | static struct ctl_table_header *ipq_sysctl_header; |
516 | 517 | ||
517 | static ctl_table ipq_table[] = { | 518 | static ctl_table ipq_table[] = { |
@@ -525,7 +526,9 @@ static ctl_table ipq_table[] = { | |||
525 | }, | 526 | }, |
526 | { .ctl_name = 0 } | 527 | { .ctl_name = 0 } |
527 | }; | 528 | }; |
529 | #endif | ||
528 | 530 | ||
531 | #ifdef CONFIG_PROC_FS | ||
529 | static int ip_queue_show(struct seq_file *m, void *v) | 532 | static int ip_queue_show(struct seq_file *m, void *v) |
530 | { | 533 | { |
531 | read_lock_bh(&queue_lock); | 534 | read_lock_bh(&queue_lock); |
@@ -562,6 +565,7 @@ static const struct file_operations ip_queue_proc_fops = { | |||
562 | .release = single_release, | 565 | .release = single_release, |
563 | .owner = THIS_MODULE, | 566 | .owner = THIS_MODULE, |
564 | }; | 567 | }; |
568 | #endif | ||
565 | 569 | ||
566 | static const struct nf_queue_handler nfqh = { | 570 | static const struct nf_queue_handler nfqh = { |
567 | .name = "ip_queue", | 571 | .name = "ip_queue", |
@@ -571,7 +575,7 @@ static const struct nf_queue_handler nfqh = { | |||
571 | static int __init ip_queue_init(void) | 575 | static int __init ip_queue_init(void) |
572 | { | 576 | { |
573 | int status = -ENOMEM; | 577 | int status = -ENOMEM; |
574 | struct proc_dir_entry *proc; | 578 | struct proc_dir_entry *proc __maybe_unused; |
575 | 579 | ||
576 | netlink_register_notifier(&ipq_nl_notifier); | 580 | netlink_register_notifier(&ipq_nl_notifier); |
577 | ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0, | 581 | ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0, |
@@ -581,6 +585,7 @@ static int __init ip_queue_init(void) | |||
581 | goto cleanup_netlink_notifier; | 585 | goto cleanup_netlink_notifier; |
582 | } | 586 | } |
583 | 587 | ||
588 | #ifdef CONFIG_PROC_FS | ||
584 | proc = create_proc_entry(IPQ_PROC_FS_NAME, 0, init_net.proc_net); | 589 | proc = create_proc_entry(IPQ_PROC_FS_NAME, 0, init_net.proc_net); |
585 | if (proc) { | 590 | if (proc) { |
586 | proc->owner = THIS_MODULE; | 591 | proc->owner = THIS_MODULE; |
@@ -589,10 +594,11 @@ static int __init ip_queue_init(void) | |||
589 | printk(KERN_ERR "ip_queue: failed to create proc entry\n"); | 594 | printk(KERN_ERR "ip_queue: failed to create proc entry\n"); |
590 | goto cleanup_ipqnl; | 595 | goto cleanup_ipqnl; |
591 | } | 596 | } |
592 | 597 | #endif | |
593 | register_netdevice_notifier(&ipq_dev_notifier); | 598 | register_netdevice_notifier(&ipq_dev_notifier); |
599 | #ifdef CONFIG_SYSCTL | ||
594 | ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table); | 600 | ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table); |
595 | 601 | #endif | |
596 | status = nf_register_queue_handler(PF_INET, &nfqh); | 602 | status = nf_register_queue_handler(PF_INET, &nfqh); |
597 | if (status < 0) { | 603 | if (status < 0) { |
598 | printk(KERN_ERR "ip_queue: failed to register queue handler\n"); | 604 | printk(KERN_ERR "ip_queue: failed to register queue handler\n"); |
@@ -601,10 +607,12 @@ static int __init ip_queue_init(void) | |||
601 | return status; | 607 | return status; |
602 | 608 | ||
603 | cleanup_sysctl: | 609 | cleanup_sysctl: |
610 | #ifdef CONFIG_SYSCTL | ||
604 | unregister_sysctl_table(ipq_sysctl_header); | 611 | unregister_sysctl_table(ipq_sysctl_header); |
612 | #endif | ||
605 | unregister_netdevice_notifier(&ipq_dev_notifier); | 613 | unregister_netdevice_notifier(&ipq_dev_notifier); |
606 | proc_net_remove(&init_net, IPQ_PROC_FS_NAME); | 614 | proc_net_remove(&init_net, IPQ_PROC_FS_NAME); |
607 | cleanup_ipqnl: | 615 | cleanup_ipqnl: __maybe_unused |
608 | netlink_kernel_release(ipqnl); | 616 | netlink_kernel_release(ipqnl); |
609 | mutex_lock(&ipqnl_mutex); | 617 | mutex_lock(&ipqnl_mutex); |
610 | mutex_unlock(&ipqnl_mutex); | 618 | mutex_unlock(&ipqnl_mutex); |
@@ -620,7 +628,9 @@ static void __exit ip_queue_fini(void) | |||
620 | synchronize_net(); | 628 | synchronize_net(); |
621 | ipq_flush(NULL, 0); | 629 | ipq_flush(NULL, 0); |
622 | 630 | ||
631 | #ifdef CONFIG_SYSCTL | ||
623 | unregister_sysctl_table(ipq_sysctl_header); | 632 | unregister_sysctl_table(ipq_sysctl_header); |
633 | #endif | ||
624 | unregister_netdevice_notifier(&ipq_dev_notifier); | 634 | unregister_netdevice_notifier(&ipq_dev_notifier); |
625 | proc_net_remove(&init_net, IPQ_PROC_FS_NAME); | 635 | proc_net_remove(&init_net, IPQ_PROC_FS_NAME); |
626 | 636 | ||
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 982b7f986291..600737f122d2 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -291,7 +291,7 @@ static void trace_packet(struct sk_buff *skb, | |||
291 | unsigned int hook, | 291 | unsigned int hook, |
292 | const struct net_device *in, | 292 | const struct net_device *in, |
293 | const struct net_device *out, | 293 | const struct net_device *out, |
294 | char *tablename, | 294 | const char *tablename, |
295 | struct xt_table_info *private, | 295 | struct xt_table_info *private, |
296 | struct ipt_entry *e) | 296 | struct ipt_entry *e) |
297 | { | 297 | { |
@@ -1092,7 +1092,7 @@ static int compat_table_info(const struct xt_table_info *info, | |||
1092 | } | 1092 | } |
1093 | #endif | 1093 | #endif |
1094 | 1094 | ||
1095 | static int get_info(void __user *user, int *len, int compat) | 1095 | static int get_info(struct net *net, void __user *user, int *len, int compat) |
1096 | { | 1096 | { |
1097 | char name[IPT_TABLE_MAXNAMELEN]; | 1097 | char name[IPT_TABLE_MAXNAMELEN]; |
1098 | struct xt_table *t; | 1098 | struct xt_table *t; |
@@ -1112,7 +1112,7 @@ static int get_info(void __user *user, int *len, int compat) | |||
1112 | if (compat) | 1112 | if (compat) |
1113 | xt_compat_lock(AF_INET); | 1113 | xt_compat_lock(AF_INET); |
1114 | #endif | 1114 | #endif |
1115 | t = try_then_request_module(xt_find_table_lock(AF_INET, name), | 1115 | t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), |
1116 | "iptable_%s", name); | 1116 | "iptable_%s", name); |
1117 | if (t && !IS_ERR(t)) { | 1117 | if (t && !IS_ERR(t)) { |
1118 | struct ipt_getinfo info; | 1118 | struct ipt_getinfo info; |
@@ -1152,7 +1152,7 @@ static int get_info(void __user *user, int *len, int compat) | |||
1152 | } | 1152 | } |
1153 | 1153 | ||
1154 | static int | 1154 | static int |
1155 | get_entries(struct ipt_get_entries __user *uptr, int *len) | 1155 | get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len) |
1156 | { | 1156 | { |
1157 | int ret; | 1157 | int ret; |
1158 | struct ipt_get_entries get; | 1158 | struct ipt_get_entries get; |
@@ -1170,7 +1170,7 @@ get_entries(struct ipt_get_entries __user *uptr, int *len) | |||
1170 | return -EINVAL; | 1170 | return -EINVAL; |
1171 | } | 1171 | } |
1172 | 1172 | ||
1173 | t = xt_find_table_lock(AF_INET, get.name); | 1173 | t = xt_find_table_lock(net, AF_INET, get.name); |
1174 | if (t && !IS_ERR(t)) { | 1174 | if (t && !IS_ERR(t)) { |
1175 | struct xt_table_info *private = t->private; | 1175 | struct xt_table_info *private = t->private; |
1176 | duprintf("t->private->number = %u\n", private->number); | 1176 | duprintf("t->private->number = %u\n", private->number); |
@@ -1191,7 +1191,7 @@ get_entries(struct ipt_get_entries __user *uptr, int *len) | |||
1191 | } | 1191 | } |
1192 | 1192 | ||
1193 | static int | 1193 | static int |
1194 | __do_replace(const char *name, unsigned int valid_hooks, | 1194 | __do_replace(struct net *net, const char *name, unsigned int valid_hooks, |
1195 | struct xt_table_info *newinfo, unsigned int num_counters, | 1195 | struct xt_table_info *newinfo, unsigned int num_counters, |
1196 | void __user *counters_ptr) | 1196 | void __user *counters_ptr) |
1197 | { | 1197 | { |
@@ -1208,7 +1208,7 @@ __do_replace(const char *name, unsigned int valid_hooks, | |||
1208 | goto out; | 1208 | goto out; |
1209 | } | 1209 | } |
1210 | 1210 | ||
1211 | t = try_then_request_module(xt_find_table_lock(AF_INET, name), | 1211 | t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), |
1212 | "iptable_%s", name); | 1212 | "iptable_%s", name); |
1213 | if (!t || IS_ERR(t)) { | 1213 | if (!t || IS_ERR(t)) { |
1214 | ret = t ? PTR_ERR(t) : -ENOENT; | 1214 | ret = t ? PTR_ERR(t) : -ENOENT; |
@@ -1261,7 +1261,7 @@ __do_replace(const char *name, unsigned int valid_hooks, | |||
1261 | } | 1261 | } |
1262 | 1262 | ||
1263 | static int | 1263 | static int |
1264 | do_replace(void __user *user, unsigned int len) | 1264 | do_replace(struct net *net, void __user *user, unsigned int len) |
1265 | { | 1265 | { |
1266 | int ret; | 1266 | int ret; |
1267 | struct ipt_replace tmp; | 1267 | struct ipt_replace tmp; |
@@ -1295,7 +1295,7 @@ do_replace(void __user *user, unsigned int len) | |||
1295 | 1295 | ||
1296 | duprintf("ip_tables: Translated table\n"); | 1296 | duprintf("ip_tables: Translated table\n"); |
1297 | 1297 | ||
1298 | ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, | 1298 | ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, |
1299 | tmp.num_counters, tmp.counters); | 1299 | tmp.num_counters, tmp.counters); |
1300 | if (ret) | 1300 | if (ret) |
1301 | goto free_newinfo_untrans; | 1301 | goto free_newinfo_untrans; |
@@ -1331,7 +1331,7 @@ add_counter_to_entry(struct ipt_entry *e, | |||
1331 | } | 1331 | } |
1332 | 1332 | ||
1333 | static int | 1333 | static int |
1334 | do_add_counters(void __user *user, unsigned int len, int compat) | 1334 | do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) |
1335 | { | 1335 | { |
1336 | unsigned int i; | 1336 | unsigned int i; |
1337 | struct xt_counters_info tmp; | 1337 | struct xt_counters_info tmp; |
@@ -1383,7 +1383,7 @@ do_add_counters(void __user *user, unsigned int len, int compat) | |||
1383 | goto free; | 1383 | goto free; |
1384 | } | 1384 | } |
1385 | 1385 | ||
1386 | t = xt_find_table_lock(AF_INET, name); | 1386 | t = xt_find_table_lock(net, AF_INET, name); |
1387 | if (!t || IS_ERR(t)) { | 1387 | if (!t || IS_ERR(t)) { |
1388 | ret = t ? PTR_ERR(t) : -ENOENT; | 1388 | ret = t ? PTR_ERR(t) : -ENOENT; |
1389 | goto free; | 1389 | goto free; |
@@ -1429,7 +1429,7 @@ struct compat_ipt_replace { | |||
1429 | 1429 | ||
1430 | static int | 1430 | static int |
1431 | compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, | 1431 | compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, |
1432 | compat_uint_t *size, struct xt_counters *counters, | 1432 | unsigned int *size, struct xt_counters *counters, |
1433 | unsigned int *i) | 1433 | unsigned int *i) |
1434 | { | 1434 | { |
1435 | struct ipt_entry_target *t; | 1435 | struct ipt_entry_target *t; |
@@ -1476,7 +1476,7 @@ compat_find_calc_match(struct ipt_entry_match *m, | |||
1476 | const char *name, | 1476 | const char *name, |
1477 | const struct ipt_ip *ip, | 1477 | const struct ipt_ip *ip, |
1478 | unsigned int hookmask, | 1478 | unsigned int hookmask, |
1479 | int *size, int *i) | 1479 | int *size, unsigned int *i) |
1480 | { | 1480 | { |
1481 | struct xt_match *match; | 1481 | struct xt_match *match; |
1482 | 1482 | ||
@@ -1534,7 +1534,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, | |||
1534 | struct ipt_entry_target *t; | 1534 | struct ipt_entry_target *t; |
1535 | struct xt_target *target; | 1535 | struct xt_target *target; |
1536 | unsigned int entry_offset; | 1536 | unsigned int entry_offset; |
1537 | int ret, off, h, j; | 1537 | unsigned int j; |
1538 | int ret, off, h; | ||
1538 | 1539 | ||
1539 | duprintf("check_compat_entry_size_and_hooks %p\n", e); | 1540 | duprintf("check_compat_entry_size_and_hooks %p\n", e); |
1540 | if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 | 1541 | if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 |
@@ -1647,7 +1648,8 @@ static int | |||
1647 | compat_check_entry(struct ipt_entry *e, const char *name, | 1648 | compat_check_entry(struct ipt_entry *e, const char *name, |
1648 | unsigned int *i) | 1649 | unsigned int *i) |
1649 | { | 1650 | { |
1650 | int j, ret; | 1651 | unsigned int j; |
1652 | int ret; | ||
1651 | 1653 | ||
1652 | j = 0; | 1654 | j = 0; |
1653 | ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, | 1655 | ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, |
@@ -1789,7 +1791,7 @@ out_unlock: | |||
1789 | } | 1791 | } |
1790 | 1792 | ||
1791 | static int | 1793 | static int |
1792 | compat_do_replace(void __user *user, unsigned int len) | 1794 | compat_do_replace(struct net *net, void __user *user, unsigned int len) |
1793 | { | 1795 | { |
1794 | int ret; | 1796 | int ret; |
1795 | struct compat_ipt_replace tmp; | 1797 | struct compat_ipt_replace tmp; |
@@ -1826,7 +1828,7 @@ compat_do_replace(void __user *user, unsigned int len) | |||
1826 | 1828 | ||
1827 | duprintf("compat_do_replace: Translated table\n"); | 1829 | duprintf("compat_do_replace: Translated table\n"); |
1828 | 1830 | ||
1829 | ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, | 1831 | ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, |
1830 | tmp.num_counters, compat_ptr(tmp.counters)); | 1832 | tmp.num_counters, compat_ptr(tmp.counters)); |
1831 | if (ret) | 1833 | if (ret) |
1832 | goto free_newinfo_untrans; | 1834 | goto free_newinfo_untrans; |
@@ -1850,11 +1852,11 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, | |||
1850 | 1852 | ||
1851 | switch (cmd) { | 1853 | switch (cmd) { |
1852 | case IPT_SO_SET_REPLACE: | 1854 | case IPT_SO_SET_REPLACE: |
1853 | ret = compat_do_replace(user, len); | 1855 | ret = compat_do_replace(sk->sk_net, user, len); |
1854 | break; | 1856 | break; |
1855 | 1857 | ||
1856 | case IPT_SO_SET_ADD_COUNTERS: | 1858 | case IPT_SO_SET_ADD_COUNTERS: |
1857 | ret = do_add_counters(user, len, 1); | 1859 | ret = do_add_counters(sk->sk_net, user, len, 1); |
1858 | break; | 1860 | break; |
1859 | 1861 | ||
1860 | default: | 1862 | default: |
@@ -1903,7 +1905,8 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, | |||
1903 | } | 1905 | } |
1904 | 1906 | ||
1905 | static int | 1907 | static int |
1906 | compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len) | 1908 | compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, |
1909 | int *len) | ||
1907 | { | 1910 | { |
1908 | int ret; | 1911 | int ret; |
1909 | struct compat_ipt_get_entries get; | 1912 | struct compat_ipt_get_entries get; |
@@ -1924,7 +1927,7 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len) | |||
1924 | } | 1927 | } |
1925 | 1928 | ||
1926 | xt_compat_lock(AF_INET); | 1929 | xt_compat_lock(AF_INET); |
1927 | t = xt_find_table_lock(AF_INET, get.name); | 1930 | t = xt_find_table_lock(net, AF_INET, get.name); |
1928 | if (t && !IS_ERR(t)) { | 1931 | if (t && !IS_ERR(t)) { |
1929 | struct xt_table_info *private = t->private; | 1932 | struct xt_table_info *private = t->private; |
1930 | struct xt_table_info info; | 1933 | struct xt_table_info info; |
@@ -1960,10 +1963,10 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
1960 | 1963 | ||
1961 | switch (cmd) { | 1964 | switch (cmd) { |
1962 | case IPT_SO_GET_INFO: | 1965 | case IPT_SO_GET_INFO: |
1963 | ret = get_info(user, len, 1); | 1966 | ret = get_info(sk->sk_net, user, len, 1); |
1964 | break; | 1967 | break; |
1965 | case IPT_SO_GET_ENTRIES: | 1968 | case IPT_SO_GET_ENTRIES: |
1966 | ret = compat_get_entries(user, len); | 1969 | ret = compat_get_entries(sk->sk_net, user, len); |
1967 | break; | 1970 | break; |
1968 | default: | 1971 | default: |
1969 | ret = do_ipt_get_ctl(sk, cmd, user, len); | 1972 | ret = do_ipt_get_ctl(sk, cmd, user, len); |
@@ -1982,11 +1985,11 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) | |||
1982 | 1985 | ||
1983 | switch (cmd) { | 1986 | switch (cmd) { |
1984 | case IPT_SO_SET_REPLACE: | 1987 | case IPT_SO_SET_REPLACE: |
1985 | ret = do_replace(user, len); | 1988 | ret = do_replace(sk->sk_net, user, len); |
1986 | break; | 1989 | break; |
1987 | 1990 | ||
1988 | case IPT_SO_SET_ADD_COUNTERS: | 1991 | case IPT_SO_SET_ADD_COUNTERS: |
1989 | ret = do_add_counters(user, len, 0); | 1992 | ret = do_add_counters(sk->sk_net, user, len, 0); |
1990 | break; | 1993 | break; |
1991 | 1994 | ||
1992 | default: | 1995 | default: |
@@ -2007,11 +2010,11 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2007 | 2010 | ||
2008 | switch (cmd) { | 2011 | switch (cmd) { |
2009 | case IPT_SO_GET_INFO: | 2012 | case IPT_SO_GET_INFO: |
2010 | ret = get_info(user, len, 0); | 2013 | ret = get_info(sk->sk_net, user, len, 0); |
2011 | break; | 2014 | break; |
2012 | 2015 | ||
2013 | case IPT_SO_GET_ENTRIES: | 2016 | case IPT_SO_GET_ENTRIES: |
2014 | ret = get_entries(user, len); | 2017 | ret = get_entries(sk->sk_net, user, len); |
2015 | break; | 2018 | break; |
2016 | 2019 | ||
2017 | case IPT_SO_GET_REVISION_MATCH: | 2020 | case IPT_SO_GET_REVISION_MATCH: |
@@ -2048,17 +2051,21 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2048 | return ret; | 2051 | return ret; |
2049 | } | 2052 | } |
2050 | 2053 | ||
2051 | int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) | 2054 | struct xt_table *ipt_register_table(struct net *net, struct xt_table *table, |
2055 | const struct ipt_replace *repl) | ||
2052 | { | 2056 | { |
2053 | int ret; | 2057 | int ret; |
2054 | struct xt_table_info *newinfo; | 2058 | struct xt_table_info *newinfo; |
2055 | struct xt_table_info bootstrap | 2059 | struct xt_table_info bootstrap |
2056 | = { 0, 0, 0, { 0 }, { 0 }, { } }; | 2060 | = { 0, 0, 0, { 0 }, { 0 }, { } }; |
2057 | void *loc_cpu_entry; | 2061 | void *loc_cpu_entry; |
2062 | struct xt_table *new_table; | ||
2058 | 2063 | ||
2059 | newinfo = xt_alloc_table_info(repl->size); | 2064 | newinfo = xt_alloc_table_info(repl->size); |
2060 | if (!newinfo) | 2065 | if (!newinfo) { |
2061 | return -ENOMEM; | 2066 | ret = -ENOMEM; |
2067 | goto out; | ||
2068 | } | ||
2062 | 2069 | ||
2063 | /* choose the copy on our node/cpu, but dont care about preemption */ | 2070 | /* choose the copy on our node/cpu, but dont care about preemption */ |
2064 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; | 2071 | loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; |
@@ -2069,30 +2076,36 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) | |||
2069 | repl->num_entries, | 2076 | repl->num_entries, |
2070 | repl->hook_entry, | 2077 | repl->hook_entry, |
2071 | repl->underflow); | 2078 | repl->underflow); |
2072 | if (ret != 0) { | 2079 | if (ret != 0) |
2073 | xt_free_table_info(newinfo); | 2080 | goto out_free; |
2074 | return ret; | ||
2075 | } | ||
2076 | 2081 | ||
2077 | ret = xt_register_table(table, &bootstrap, newinfo); | 2082 | new_table = xt_register_table(net, table, &bootstrap, newinfo); |
2078 | if (ret != 0) { | 2083 | if (IS_ERR(new_table)) { |
2079 | xt_free_table_info(newinfo); | 2084 | ret = PTR_ERR(new_table); |
2080 | return ret; | 2085 | goto out_free; |
2081 | } | 2086 | } |
2082 | 2087 | ||
2083 | return 0; | 2088 | return new_table; |
2089 | |||
2090 | out_free: | ||
2091 | xt_free_table_info(newinfo); | ||
2092 | out: | ||
2093 | return ERR_PTR(ret); | ||
2084 | } | 2094 | } |
2085 | 2095 | ||
2086 | void ipt_unregister_table(struct xt_table *table) | 2096 | void ipt_unregister_table(struct xt_table *table) |
2087 | { | 2097 | { |
2088 | struct xt_table_info *private; | 2098 | struct xt_table_info *private; |
2089 | void *loc_cpu_entry; | 2099 | void *loc_cpu_entry; |
2100 | struct module *table_owner = table->me; | ||
2090 | 2101 | ||
2091 | private = xt_unregister_table(table); | 2102 | private = xt_unregister_table(table); |
2092 | 2103 | ||
2093 | /* Decrease module usage counts and free resources */ | 2104 | /* Decrease module usage counts and free resources */ |
2094 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; | 2105 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; |
2095 | IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL); | 2106 | IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL); |
2107 | if (private->number > private->initial_entries) | ||
2108 | module_put(table_owner); | ||
2096 | xt_free_table_info(private); | 2109 | xt_free_table_info(private); |
2097 | } | 2110 | } |
2098 | 2111 | ||
@@ -2200,11 +2213,26 @@ static struct xt_match icmp_matchstruct __read_mostly = { | |||
2200 | .family = AF_INET, | 2213 | .family = AF_INET, |
2201 | }; | 2214 | }; |
2202 | 2215 | ||
2216 | static int __net_init ip_tables_net_init(struct net *net) | ||
2217 | { | ||
2218 | return xt_proto_init(net, AF_INET); | ||
2219 | } | ||
2220 | |||
2221 | static void __net_exit ip_tables_net_exit(struct net *net) | ||
2222 | { | ||
2223 | xt_proto_fini(net, AF_INET); | ||
2224 | } | ||
2225 | |||
2226 | static struct pernet_operations ip_tables_net_ops = { | ||
2227 | .init = ip_tables_net_init, | ||
2228 | .exit = ip_tables_net_exit, | ||
2229 | }; | ||
2230 | |||
2203 | static int __init ip_tables_init(void) | 2231 | static int __init ip_tables_init(void) |
2204 | { | 2232 | { |
2205 | int ret; | 2233 | int ret; |
2206 | 2234 | ||
2207 | ret = xt_proto_init(AF_INET); | 2235 | ret = register_pernet_subsys(&ip_tables_net_ops); |
2208 | if (ret < 0) | 2236 | if (ret < 0) |
2209 | goto err1; | 2237 | goto err1; |
2210 | 2238 | ||
@@ -2234,7 +2262,7 @@ err4: | |||
2234 | err3: | 2262 | err3: |
2235 | xt_unregister_target(&ipt_standard_target); | 2263 | xt_unregister_target(&ipt_standard_target); |
2236 | err2: | 2264 | err2: |
2237 | xt_proto_fini(AF_INET); | 2265 | unregister_pernet_subsys(&ip_tables_net_ops); |
2238 | err1: | 2266 | err1: |
2239 | return ret; | 2267 | return ret; |
2240 | } | 2268 | } |
@@ -2247,7 +2275,7 @@ static void __exit ip_tables_fini(void) | |||
2247 | xt_unregister_target(&ipt_error_target); | 2275 | xt_unregister_target(&ipt_error_target); |
2248 | xt_unregister_target(&ipt_standard_target); | 2276 | xt_unregister_target(&ipt_standard_target); |
2249 | 2277 | ||
2250 | xt_proto_fini(AF_INET); | 2278 | unregister_pernet_subsys(&ip_tables_net_ops); |
2251 | } | 2279 | } |
2252 | 2280 | ||
2253 | EXPORT_SYMBOL(ipt_register_table); | 2281 | EXPORT_SYMBOL(ipt_register_table); |
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 1b31f7d14d46..c6cf84c77611 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
@@ -76,13 +76,6 @@ clusterip_config_put(struct clusterip_config *c) | |||
76 | kfree(c); | 76 | kfree(c); |
77 | } | 77 | } |
78 | 78 | ||
79 | /* increase the count of entries(rules) using/referencing this config */ | ||
80 | static inline void | ||
81 | clusterip_config_entry_get(struct clusterip_config *c) | ||
82 | { | ||
83 | atomic_inc(&c->entries); | ||
84 | } | ||
85 | |||
86 | /* decrease the count of entries using/referencing this config. If last | 79 | /* decrease the count of entries using/referencing this config. If last |
87 | * entry(rule) is removed, remove the config from lists, but don't free it | 80 | * entry(rule) is removed, remove the config from lists, but don't free it |
88 | * yet, since proc-files could still be holding references */ | 81 | * yet, since proc-files could still be holding references */ |
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index e3154a99c08a..68cbe3ca01ce 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c | |||
@@ -212,11 +212,11 @@ recent_mt(const struct sk_buff *skb, const struct net_device *in, | |||
212 | recent_entry_remove(t, e); | 212 | recent_entry_remove(t, e); |
213 | ret = !ret; | 213 | ret = !ret; |
214 | } else if (info->check_set & (IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) { | 214 | } else if (info->check_set & (IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) { |
215 | unsigned long t = jiffies - info->seconds * HZ; | 215 | unsigned long time = jiffies - info->seconds * HZ; |
216 | unsigned int i, hits = 0; | 216 | unsigned int i, hits = 0; |
217 | 217 | ||
218 | for (i = 0; i < e->nstamps; i++) { | 218 | for (i = 0; i < e->nstamps; i++) { |
219 | if (info->seconds && time_after(t, e->stamps[i])) | 219 | if (info->seconds && time_after(time, e->stamps[i])) |
220 | continue; | 220 | continue; |
221 | if (++hits >= info->hit_count) { | 221 | if (++hits >= info->hit_count) { |
222 | ret = !ret; | 222 | ret = !ret; |
@@ -320,6 +320,7 @@ struct recent_iter_state { | |||
320 | }; | 320 | }; |
321 | 321 | ||
322 | static void *recent_seq_start(struct seq_file *seq, loff_t *pos) | 322 | static void *recent_seq_start(struct seq_file *seq, loff_t *pos) |
323 | __acquires(recent_lock) | ||
323 | { | 324 | { |
324 | struct recent_iter_state *st = seq->private; | 325 | struct recent_iter_state *st = seq->private; |
325 | const struct recent_table *t = st->table; | 326 | const struct recent_table *t = st->table; |
@@ -352,6 +353,7 @@ static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
352 | } | 353 | } |
353 | 354 | ||
354 | static void recent_seq_stop(struct seq_file *s, void *v) | 355 | static void recent_seq_stop(struct seq_file *s, void *v) |
356 | __releases(recent_lock) | ||
355 | { | 357 | { |
356 | spin_unlock_bh(&recent_lock); | 358 | spin_unlock_bh(&recent_lock); |
357 | } | 359 | } |
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 29bb4f9fbda0..69f3d7e6e96f 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c | |||
@@ -28,7 +28,7 @@ static struct | |||
28 | struct ipt_replace repl; | 28 | struct ipt_replace repl; |
29 | struct ipt_standard entries[3]; | 29 | struct ipt_standard entries[3]; |
30 | struct ipt_error term; | 30 | struct ipt_error term; |
31 | } initial_table __initdata = { | 31 | } initial_table __net_initdata = { |
32 | .repl = { | 32 | .repl = { |
33 | .name = "filter", | 33 | .name = "filter", |
34 | .valid_hooks = FILTER_VALID_HOOKS, | 34 | .valid_hooks = FILTER_VALID_HOOKS, |
@@ -69,7 +69,7 @@ ipt_hook(unsigned int hook, | |||
69 | const struct net_device *out, | 69 | const struct net_device *out, |
70 | int (*okfn)(struct sk_buff *)) | 70 | int (*okfn)(struct sk_buff *)) |
71 | { | 71 | { |
72 | return ipt_do_table(skb, hook, in, out, &packet_filter); | 72 | return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter); |
73 | } | 73 | } |
74 | 74 | ||
75 | static unsigned int | 75 | static unsigned int |
@@ -88,7 +88,7 @@ ipt_local_out_hook(unsigned int hook, | |||
88 | return NF_ACCEPT; | 88 | return NF_ACCEPT; |
89 | } | 89 | } |
90 | 90 | ||
91 | return ipt_do_table(skb, hook, in, out, &packet_filter); | 91 | return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter); |
92 | } | 92 | } |
93 | 93 | ||
94 | static struct nf_hook_ops ipt_ops[] __read_mostly = { | 94 | static struct nf_hook_ops ipt_ops[] __read_mostly = { |
@@ -119,6 +119,26 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = { | |||
119 | static int forward = NF_ACCEPT; | 119 | static int forward = NF_ACCEPT; |
120 | module_param(forward, bool, 0000); | 120 | module_param(forward, bool, 0000); |
121 | 121 | ||
122 | static int __net_init iptable_filter_net_init(struct net *net) | ||
123 | { | ||
124 | /* Register table */ | ||
125 | net->ipv4.iptable_filter = | ||
126 | ipt_register_table(net, &packet_filter, &initial_table.repl); | ||
127 | if (IS_ERR(net->ipv4.iptable_filter)) | ||
128 | return PTR_ERR(net->ipv4.iptable_filter); | ||
129 | return 0; | ||
130 | } | ||
131 | |||
132 | static void __net_exit iptable_filter_net_exit(struct net *net) | ||
133 | { | ||
134 | ipt_unregister_table(net->ipv4.iptable_filter); | ||
135 | } | ||
136 | |||
137 | static struct pernet_operations iptable_filter_net_ops = { | ||
138 | .init = iptable_filter_net_init, | ||
139 | .exit = iptable_filter_net_exit, | ||
140 | }; | ||
141 | |||
122 | static int __init iptable_filter_init(void) | 142 | static int __init iptable_filter_init(void) |
123 | { | 143 | { |
124 | int ret; | 144 | int ret; |
@@ -131,8 +151,7 @@ static int __init iptable_filter_init(void) | |||
131 | /* Entry 1 is the FORWARD hook */ | 151 | /* Entry 1 is the FORWARD hook */ |
132 | initial_table.entries[1].target.verdict = -forward - 1; | 152 | initial_table.entries[1].target.verdict = -forward - 1; |
133 | 153 | ||
134 | /* Register table */ | 154 | ret = register_pernet_subsys(&iptable_filter_net_ops); |
135 | ret = ipt_register_table(&packet_filter, &initial_table.repl); | ||
136 | if (ret < 0) | 155 | if (ret < 0) |
137 | return ret; | 156 | return ret; |
138 | 157 | ||
@@ -144,14 +163,14 @@ static int __init iptable_filter_init(void) | |||
144 | return ret; | 163 | return ret; |
145 | 164 | ||
146 | cleanup_table: | 165 | cleanup_table: |
147 | ipt_unregister_table(&packet_filter); | 166 | unregister_pernet_subsys(&iptable_filter_net_ops); |
148 | return ret; | 167 | return ret; |
149 | } | 168 | } |
150 | 169 | ||
151 | static void __exit iptable_filter_fini(void) | 170 | static void __exit iptable_filter_fini(void) |
152 | { | 171 | { |
153 | nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); | 172 | nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); |
154 | ipt_unregister_table(&packet_filter); | 173 | unregister_pernet_subsys(&iptable_filter_net_ops); |
155 | } | 174 | } |
156 | 175 | ||
157 | module_init(iptable_filter_init); | 176 | module_init(iptable_filter_init); |
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 5c4be202430c..c55a210853a7 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c | |||
@@ -33,7 +33,7 @@ static struct | |||
33 | struct ipt_replace repl; | 33 | struct ipt_replace repl; |
34 | struct ipt_standard entries[5]; | 34 | struct ipt_standard entries[5]; |
35 | struct ipt_error term; | 35 | struct ipt_error term; |
36 | } initial_table __initdata = { | 36 | } initial_table __net_initdata = { |
37 | .repl = { | 37 | .repl = { |
38 | .name = "mangle", | 38 | .name = "mangle", |
39 | .valid_hooks = MANGLE_VALID_HOOKS, | 39 | .valid_hooks = MANGLE_VALID_HOOKS, |
@@ -80,7 +80,7 @@ ipt_route_hook(unsigned int hook, | |||
80 | const struct net_device *out, | 80 | const struct net_device *out, |
81 | int (*okfn)(struct sk_buff *)) | 81 | int (*okfn)(struct sk_buff *)) |
82 | { | 82 | { |
83 | return ipt_do_table(skb, hook, in, out, &packet_mangler); | 83 | return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_mangle); |
84 | } | 84 | } |
85 | 85 | ||
86 | static unsigned int | 86 | static unsigned int |
@@ -112,7 +112,7 @@ ipt_local_hook(unsigned int hook, | |||
112 | daddr = iph->daddr; | 112 | daddr = iph->daddr; |
113 | tos = iph->tos; | 113 | tos = iph->tos; |
114 | 114 | ||
115 | ret = ipt_do_table(skb, hook, in, out, &packet_mangler); | 115 | ret = ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_mangle); |
116 | /* Reroute for ANY change. */ | 116 | /* Reroute for ANY change. */ |
117 | if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { | 117 | if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { |
118 | iph = ip_hdr(skb); | 118 | iph = ip_hdr(skb); |
@@ -166,12 +166,31 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = { | |||
166 | }, | 166 | }, |
167 | }; | 167 | }; |
168 | 168 | ||
169 | static int __net_init iptable_mangle_net_init(struct net *net) | ||
170 | { | ||
171 | /* Register table */ | ||
172 | net->ipv4.iptable_mangle = | ||
173 | ipt_register_table(net, &packet_mangler, &initial_table.repl); | ||
174 | if (IS_ERR(net->ipv4.iptable_mangle)) | ||
175 | return PTR_ERR(net->ipv4.iptable_mangle); | ||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | static void __net_exit iptable_mangle_net_exit(struct net *net) | ||
180 | { | ||
181 | ipt_unregister_table(net->ipv4.iptable_mangle); | ||
182 | } | ||
183 | |||
184 | static struct pernet_operations iptable_mangle_net_ops = { | ||
185 | .init = iptable_mangle_net_init, | ||
186 | .exit = iptable_mangle_net_exit, | ||
187 | }; | ||
188 | |||
169 | static int __init iptable_mangle_init(void) | 189 | static int __init iptable_mangle_init(void) |
170 | { | 190 | { |
171 | int ret; | 191 | int ret; |
172 | 192 | ||
173 | /* Register table */ | 193 | ret = register_pernet_subsys(&iptable_mangle_net_ops); |
174 | ret = ipt_register_table(&packet_mangler, &initial_table.repl); | ||
175 | if (ret < 0) | 194 | if (ret < 0) |
176 | return ret; | 195 | return ret; |
177 | 196 | ||
@@ -183,14 +202,14 @@ static int __init iptable_mangle_init(void) | |||
183 | return ret; | 202 | return ret; |
184 | 203 | ||
185 | cleanup_table: | 204 | cleanup_table: |
186 | ipt_unregister_table(&packet_mangler); | 205 | unregister_pernet_subsys(&iptable_mangle_net_ops); |
187 | return ret; | 206 | return ret; |
188 | } | 207 | } |
189 | 208 | ||
190 | static void __exit iptable_mangle_fini(void) | 209 | static void __exit iptable_mangle_fini(void) |
191 | { | 210 | { |
192 | nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); | 211 | nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); |
193 | ipt_unregister_table(&packet_mangler); | 212 | unregister_pernet_subsys(&iptable_mangle_net_ops); |
194 | } | 213 | } |
195 | 214 | ||
196 | module_init(iptable_mangle_init); | 215 | module_init(iptable_mangle_init); |
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index dc34aa274533..e41fe8ca4e1c 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c | |||
@@ -14,7 +14,7 @@ static struct | |||
14 | struct ipt_replace repl; | 14 | struct ipt_replace repl; |
15 | struct ipt_standard entries[2]; | 15 | struct ipt_standard entries[2]; |
16 | struct ipt_error term; | 16 | struct ipt_error term; |
17 | } initial_table __initdata = { | 17 | } initial_table __net_initdata = { |
18 | .repl = { | 18 | .repl = { |
19 | .name = "raw", | 19 | .name = "raw", |
20 | .valid_hooks = RAW_VALID_HOOKS, | 20 | .valid_hooks = RAW_VALID_HOOKS, |
@@ -52,7 +52,7 @@ ipt_hook(unsigned int hook, | |||
52 | const struct net_device *out, | 52 | const struct net_device *out, |
53 | int (*okfn)(struct sk_buff *)) | 53 | int (*okfn)(struct sk_buff *)) |
54 | { | 54 | { |
55 | return ipt_do_table(skb, hook, in, out, &packet_raw); | 55 | return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_raw); |
56 | } | 56 | } |
57 | 57 | ||
58 | static unsigned int | 58 | static unsigned int |
@@ -70,7 +70,7 @@ ipt_local_hook(unsigned int hook, | |||
70 | "packet.\n"); | 70 | "packet.\n"); |
71 | return NF_ACCEPT; | 71 | return NF_ACCEPT; |
72 | } | 72 | } |
73 | return ipt_do_table(skb, hook, in, out, &packet_raw); | 73 | return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_raw); |
74 | } | 74 | } |
75 | 75 | ||
76 | /* 'raw' is the very first table. */ | 76 | /* 'raw' is the very first table. */ |
@@ -91,12 +91,31 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = { | |||
91 | }, | 91 | }, |
92 | }; | 92 | }; |
93 | 93 | ||
94 | static int __net_init iptable_raw_net_init(struct net *net) | ||
95 | { | ||
96 | /* Register table */ | ||
97 | net->ipv4.iptable_raw = | ||
98 | ipt_register_table(net, &packet_raw, &initial_table.repl); | ||
99 | if (IS_ERR(net->ipv4.iptable_raw)) | ||
100 | return PTR_ERR(net->ipv4.iptable_raw); | ||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static void __net_exit iptable_raw_net_exit(struct net *net) | ||
105 | { | ||
106 | ipt_unregister_table(net->ipv4.iptable_raw); | ||
107 | } | ||
108 | |||
109 | static struct pernet_operations iptable_raw_net_ops = { | ||
110 | .init = iptable_raw_net_init, | ||
111 | .exit = iptable_raw_net_exit, | ||
112 | }; | ||
113 | |||
94 | static int __init iptable_raw_init(void) | 114 | static int __init iptable_raw_init(void) |
95 | { | 115 | { |
96 | int ret; | 116 | int ret; |
97 | 117 | ||
98 | /* Register table */ | 118 | ret = register_pernet_subsys(&iptable_raw_net_ops); |
99 | ret = ipt_register_table(&packet_raw, &initial_table.repl); | ||
100 | if (ret < 0) | 119 | if (ret < 0) |
101 | return ret; | 120 | return ret; |
102 | 121 | ||
@@ -108,14 +127,14 @@ static int __init iptable_raw_init(void) | |||
108 | return ret; | 127 | return ret; |
109 | 128 | ||
110 | cleanup_table: | 129 | cleanup_table: |
111 | ipt_unregister_table(&packet_raw); | 130 | unregister_pernet_subsys(&iptable_raw_net_ops); |
112 | return ret; | 131 | return ret; |
113 | } | 132 | } |
114 | 133 | ||
115 | static void __exit iptable_raw_fini(void) | 134 | static void __exit iptable_raw_fini(void) |
116 | { | 135 | { |
117 | nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); | 136 | nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); |
118 | ipt_unregister_table(&packet_raw); | 137 | unregister_pernet_subsys(&iptable_raw_net_ops); |
119 | } | 138 | } |
120 | 139 | ||
121 | module_init(iptable_raw_init); | 140 | module_init(iptable_raw_init); |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index ac3d61d8026e..a65b845c5f15 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | |||
@@ -27,7 +27,8 @@ | |||
27 | static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, | 27 | static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, |
28 | struct nf_conntrack_tuple *tuple) | 28 | struct nf_conntrack_tuple *tuple) |
29 | { | 29 | { |
30 | __be32 _addrs[2], *ap; | 30 | const __be32 *ap; |
31 | __be32 _addrs[2]; | ||
31 | ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr), | 32 | ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr), |
32 | sizeof(u_int32_t) * 2, _addrs); | 33 | sizeof(u_int32_t) * 2, _addrs); |
33 | if (ap == NULL) | 34 | if (ap == NULL) |
@@ -76,7 +77,8 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) | |||
76 | static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, | 77 | static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, |
77 | unsigned int *dataoff, u_int8_t *protonum) | 78 | unsigned int *dataoff, u_int8_t *protonum) |
78 | { | 79 | { |
79 | struct iphdr _iph, *iph; | 80 | const struct iphdr *iph; |
81 | struct iphdr _iph; | ||
80 | 82 | ||
81 | iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); | 83 | iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); |
82 | if (iph == NULL) | 84 | if (iph == NULL) |
@@ -111,8 +113,8 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum, | |||
111 | { | 113 | { |
112 | struct nf_conn *ct; | 114 | struct nf_conn *ct; |
113 | enum ip_conntrack_info ctinfo; | 115 | enum ip_conntrack_info ctinfo; |
114 | struct nf_conn_help *help; | 116 | const struct nf_conn_help *help; |
115 | struct nf_conntrack_helper *helper; | 117 | const struct nf_conntrack_helper *helper; |
116 | 118 | ||
117 | /* This is where we call the helper: as the packet goes out. */ | 119 | /* This is where we call the helper: as the packet goes out. */ |
118 | ct = nf_ct_get(skb, &ctinfo); | 120 | ct = nf_ct_get(skb, &ctinfo); |
@@ -299,8 +301,8 @@ static ctl_table ip_ct_sysctl_table[] = { | |||
299 | static int | 301 | static int |
300 | getorigdst(struct sock *sk, int optval, void __user *user, int *len) | 302 | getorigdst(struct sock *sk, int optval, void __user *user, int *len) |
301 | { | 303 | { |
302 | struct inet_sock *inet = inet_sk(sk); | 304 | const struct inet_sock *inet = inet_sk(sk); |
303 | struct nf_conntrack_tuple_hash *h; | 305 | const struct nf_conntrack_tuple_hash *h; |
304 | struct nf_conntrack_tuple tuple; | 306 | struct nf_conntrack_tuple tuple; |
305 | 307 | ||
306 | NF_CT_TUPLE_U_BLANK(&tuple); | 308 | NF_CT_TUPLE_U_BLANK(&tuple); |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 543c02b74c96..089252e82c01 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | |||
@@ -39,12 +39,14 @@ struct ct_iter_state { | |||
39 | static struct hlist_node *ct_get_first(struct seq_file *seq) | 39 | static struct hlist_node *ct_get_first(struct seq_file *seq) |
40 | { | 40 | { |
41 | struct ct_iter_state *st = seq->private; | 41 | struct ct_iter_state *st = seq->private; |
42 | struct hlist_node *n; | ||
42 | 43 | ||
43 | for (st->bucket = 0; | 44 | for (st->bucket = 0; |
44 | st->bucket < nf_conntrack_htable_size; | 45 | st->bucket < nf_conntrack_htable_size; |
45 | st->bucket++) { | 46 | st->bucket++) { |
46 | if (!hlist_empty(&nf_conntrack_hash[st->bucket])) | 47 | n = rcu_dereference(nf_conntrack_hash[st->bucket].first); |
47 | return nf_conntrack_hash[st->bucket].first; | 48 | if (n) |
49 | return n; | ||
48 | } | 50 | } |
49 | return NULL; | 51 | return NULL; |
50 | } | 52 | } |
@@ -54,11 +56,11 @@ static struct hlist_node *ct_get_next(struct seq_file *seq, | |||
54 | { | 56 | { |
55 | struct ct_iter_state *st = seq->private; | 57 | struct ct_iter_state *st = seq->private; |
56 | 58 | ||
57 | head = head->next; | 59 | head = rcu_dereference(head->next); |
58 | while (head == NULL) { | 60 | while (head == NULL) { |
59 | if (++st->bucket >= nf_conntrack_htable_size) | 61 | if (++st->bucket >= nf_conntrack_htable_size) |
60 | return NULL; | 62 | return NULL; |
61 | head = nf_conntrack_hash[st->bucket].first; | 63 | head = rcu_dereference(nf_conntrack_hash[st->bucket].first); |
62 | } | 64 | } |
63 | return head; | 65 | return head; |
64 | } | 66 | } |
@@ -74,8 +76,9 @@ static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos) | |||
74 | } | 76 | } |
75 | 77 | ||
76 | static void *ct_seq_start(struct seq_file *seq, loff_t *pos) | 78 | static void *ct_seq_start(struct seq_file *seq, loff_t *pos) |
79 | __acquires(RCU) | ||
77 | { | 80 | { |
78 | read_lock_bh(&nf_conntrack_lock); | 81 | rcu_read_lock(); |
79 | return ct_get_idx(seq, *pos); | 82 | return ct_get_idx(seq, *pos); |
80 | } | 83 | } |
81 | 84 | ||
@@ -86,16 +89,17 @@ static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) | |||
86 | } | 89 | } |
87 | 90 | ||
88 | static void ct_seq_stop(struct seq_file *s, void *v) | 91 | static void ct_seq_stop(struct seq_file *s, void *v) |
92 | __releases(RCU) | ||
89 | { | 93 | { |
90 | read_unlock_bh(&nf_conntrack_lock); | 94 | rcu_read_unlock(); |
91 | } | 95 | } |
92 | 96 | ||
93 | static int ct_seq_show(struct seq_file *s, void *v) | 97 | static int ct_seq_show(struct seq_file *s, void *v) |
94 | { | 98 | { |
95 | const struct nf_conntrack_tuple_hash *hash = v; | 99 | const struct nf_conntrack_tuple_hash *hash = v; |
96 | const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); | 100 | const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); |
97 | struct nf_conntrack_l3proto *l3proto; | 101 | const struct nf_conntrack_l3proto *l3proto; |
98 | struct nf_conntrack_l4proto *l4proto; | 102 | const struct nf_conntrack_l4proto *l4proto; |
99 | 103 | ||
100 | NF_CT_ASSERT(ct); | 104 | NF_CT_ASSERT(ct); |
101 | 105 | ||
@@ -191,10 +195,12 @@ struct ct_expect_iter_state { | |||
191 | static struct hlist_node *ct_expect_get_first(struct seq_file *seq) | 195 | static struct hlist_node *ct_expect_get_first(struct seq_file *seq) |
192 | { | 196 | { |
193 | struct ct_expect_iter_state *st = seq->private; | 197 | struct ct_expect_iter_state *st = seq->private; |
198 | struct hlist_node *n; | ||
194 | 199 | ||
195 | for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { | 200 | for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { |
196 | if (!hlist_empty(&nf_ct_expect_hash[st->bucket])) | 201 | n = rcu_dereference(nf_ct_expect_hash[st->bucket].first); |
197 | return nf_ct_expect_hash[st->bucket].first; | 202 | if (n) |
203 | return n; | ||
198 | } | 204 | } |
199 | return NULL; | 205 | return NULL; |
200 | } | 206 | } |
@@ -204,11 +210,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq, | |||
204 | { | 210 | { |
205 | struct ct_expect_iter_state *st = seq->private; | 211 | struct ct_expect_iter_state *st = seq->private; |
206 | 212 | ||
207 | head = head->next; | 213 | head = rcu_dereference(head->next); |
208 | while (head == NULL) { | 214 | while (head == NULL) { |
209 | if (++st->bucket >= nf_ct_expect_hsize) | 215 | if (++st->bucket >= nf_ct_expect_hsize) |
210 | return NULL; | 216 | return NULL; |
211 | head = nf_ct_expect_hash[st->bucket].first; | 217 | head = rcu_dereference(nf_ct_expect_hash[st->bucket].first); |
212 | } | 218 | } |
213 | return head; | 219 | return head; |
214 | } | 220 | } |
@@ -224,8 +230,9 @@ static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos) | |||
224 | } | 230 | } |
225 | 231 | ||
226 | static void *exp_seq_start(struct seq_file *seq, loff_t *pos) | 232 | static void *exp_seq_start(struct seq_file *seq, loff_t *pos) |
233 | __acquires(RCU) | ||
227 | { | 234 | { |
228 | read_lock_bh(&nf_conntrack_lock); | 235 | rcu_read_lock(); |
229 | return ct_expect_get_idx(seq, *pos); | 236 | return ct_expect_get_idx(seq, *pos); |
230 | } | 237 | } |
231 | 238 | ||
@@ -236,14 +243,15 @@ static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
236 | } | 243 | } |
237 | 244 | ||
238 | static void exp_seq_stop(struct seq_file *seq, void *v) | 245 | static void exp_seq_stop(struct seq_file *seq, void *v) |
246 | __releases(RCU) | ||
239 | { | 247 | { |
240 | read_unlock_bh(&nf_conntrack_lock); | 248 | rcu_read_unlock(); |
241 | } | 249 | } |
242 | 250 | ||
243 | static int exp_seq_show(struct seq_file *s, void *v) | 251 | static int exp_seq_show(struct seq_file *s, void *v) |
244 | { | 252 | { |
245 | struct nf_conntrack_expect *exp; | 253 | struct nf_conntrack_expect *exp; |
246 | struct hlist_node *n = v; | 254 | const struct hlist_node *n = v; |
247 | 255 | ||
248 | exp = hlist_entry(n, struct nf_conntrack_expect, hnode); | 256 | exp = hlist_entry(n, struct nf_conntrack_expect, hnode); |
249 | 257 | ||
@@ -324,7 +332,7 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v) | |||
324 | static int ct_cpu_seq_show(struct seq_file *seq, void *v) | 332 | static int ct_cpu_seq_show(struct seq_file *seq, void *v) |
325 | { | 333 | { |
326 | unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); | 334 | unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); |
327 | struct ip_conntrack_stat *st = v; | 335 | const struct ip_conntrack_stat *st = v; |
328 | 336 | ||
329 | if (v == SEQ_START_TOKEN) { | 337 | if (v == SEQ_START_TOKEN) { |
330 | seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n"); | 338 | seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n"); |
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 4004a04c5510..6873fddb3529 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c | |||
@@ -26,7 +26,8 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb, | |||
26 | unsigned int dataoff, | 26 | unsigned int dataoff, |
27 | struct nf_conntrack_tuple *tuple) | 27 | struct nf_conntrack_tuple *tuple) |
28 | { | 28 | { |
29 | struct icmphdr _hdr, *hp; | 29 | const struct icmphdr *hp; |
30 | struct icmphdr _hdr; | ||
30 | 31 | ||
31 | hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); | 32 | hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); |
32 | if (hp == NULL) | 33 | if (hp == NULL) |
@@ -100,7 +101,7 @@ static int icmp_packet(struct nf_conn *ct, | |||
100 | } | 101 | } |
101 | 102 | ||
102 | /* Called when a new connection for this protocol found. */ | 103 | /* Called when a new connection for this protocol found. */ |
103 | static int icmp_new(struct nf_conn *conntrack, | 104 | static int icmp_new(struct nf_conn *ct, |
104 | const struct sk_buff *skb, unsigned int dataoff) | 105 | const struct sk_buff *skb, unsigned int dataoff) |
105 | { | 106 | { |
106 | static const u_int8_t valid_new[] = { | 107 | static const u_int8_t valid_new[] = { |
@@ -110,15 +111,15 @@ static int icmp_new(struct nf_conn *conntrack, | |||
110 | [ICMP_ADDRESS] = 1 | 111 | [ICMP_ADDRESS] = 1 |
111 | }; | 112 | }; |
112 | 113 | ||
113 | if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) | 114 | if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) |
114 | || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { | 115 | || !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) { |
115 | /* Can't create a new ICMP `conn' with this. */ | 116 | /* Can't create a new ICMP `conn' with this. */ |
116 | pr_debug("icmp: can't create new conn with type %u\n", | 117 | pr_debug("icmp: can't create new conn with type %u\n", |
117 | conntrack->tuplehash[0].tuple.dst.u.icmp.type); | 118 | ct->tuplehash[0].tuple.dst.u.icmp.type); |
118 | NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); | 119 | NF_CT_DUMP_TUPLE(&ct->tuplehash[0].tuple); |
119 | return 0; | 120 | return 0; |
120 | } | 121 | } |
121 | atomic_set(&conntrack->proto.icmp.count, 0); | 122 | atomic_set(&ct->proto.icmp.count, 0); |
122 | return 1; | 123 | return 1; |
123 | } | 124 | } |
124 | 125 | ||
@@ -129,8 +130,8 @@ icmp_error_message(struct sk_buff *skb, | |||
129 | unsigned int hooknum) | 130 | unsigned int hooknum) |
130 | { | 131 | { |
131 | struct nf_conntrack_tuple innertuple, origtuple; | 132 | struct nf_conntrack_tuple innertuple, origtuple; |
132 | struct nf_conntrack_l4proto *innerproto; | 133 | const struct nf_conntrack_l4proto *innerproto; |
133 | struct nf_conntrack_tuple_hash *h; | 134 | const struct nf_conntrack_tuple_hash *h; |
134 | 135 | ||
135 | NF_CT_ASSERT(skb->nfct == NULL); | 136 | NF_CT_ASSERT(skb->nfct == NULL); |
136 | 137 | ||
@@ -176,7 +177,8 @@ static int | |||
176 | icmp_error(struct sk_buff *skb, unsigned int dataoff, | 177 | icmp_error(struct sk_buff *skb, unsigned int dataoff, |
177 | enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) | 178 | enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) |
178 | { | 179 | { |
179 | struct icmphdr _ih, *icmph; | 180 | const struct icmphdr *icmph; |
181 | struct icmphdr _ih; | ||
180 | 182 | ||
181 | /* Not enough header? */ | 183 | /* Not enough header? */ |
182 | icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); | 184 | icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); |
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index e53ae1ef8f5e..dd07362d2b8f 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c | |||
@@ -31,7 +31,7 @@ | |||
31 | #include <net/netfilter/nf_conntrack_l3proto.h> | 31 | #include <net/netfilter/nf_conntrack_l3proto.h> |
32 | #include <net/netfilter/nf_conntrack_l4proto.h> | 32 | #include <net/netfilter/nf_conntrack_l4proto.h> |
33 | 33 | ||
34 | static DEFINE_RWLOCK(nf_nat_lock); | 34 | static DEFINE_SPINLOCK(nf_nat_lock); |
35 | 35 | ||
36 | static struct nf_conntrack_l3proto *l3proto __read_mostly; | 36 | static struct nf_conntrack_l3proto *l3proto __read_mostly; |
37 | 37 | ||
@@ -154,8 +154,8 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple, | |||
154 | struct nf_conn *ct; | 154 | struct nf_conn *ct; |
155 | struct hlist_node *n; | 155 | struct hlist_node *n; |
156 | 156 | ||
157 | read_lock_bh(&nf_nat_lock); | 157 | rcu_read_lock(); |
158 | hlist_for_each_entry(nat, n, &bysource[h], bysource) { | 158 | hlist_for_each_entry_rcu(nat, n, &bysource[h], bysource) { |
159 | ct = nat->ct; | 159 | ct = nat->ct; |
160 | if (same_src(ct, tuple)) { | 160 | if (same_src(ct, tuple)) { |
161 | /* Copy source part from reply tuple. */ | 161 | /* Copy source part from reply tuple. */ |
@@ -164,12 +164,12 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple, | |||
164 | result->dst = tuple->dst; | 164 | result->dst = tuple->dst; |
165 | 165 | ||
166 | if (in_range(result, range)) { | 166 | if (in_range(result, range)) { |
167 | read_unlock_bh(&nf_nat_lock); | 167 | rcu_read_unlock(); |
168 | return 1; | 168 | return 1; |
169 | } | 169 | } |
170 | } | 170 | } |
171 | } | 171 | } |
172 | read_unlock_bh(&nf_nat_lock); | 172 | rcu_read_unlock(); |
173 | return 0; | 173 | return 0; |
174 | } | 174 | } |
175 | 175 | ||
@@ -330,12 +330,12 @@ nf_nat_setup_info(struct nf_conn *ct, | |||
330 | unsigned int srchash; | 330 | unsigned int srchash; |
331 | 331 | ||
332 | srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | 332 | srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); |
333 | write_lock_bh(&nf_nat_lock); | 333 | spin_lock_bh(&nf_nat_lock); |
334 | /* nf_conntrack_alter_reply might re-allocate exntension aera */ | 334 | /* nf_conntrack_alter_reply might re-allocate exntension aera */ |
335 | nat = nfct_nat(ct); | 335 | nat = nfct_nat(ct); |
336 | nat->ct = ct; | 336 | nat->ct = ct; |
337 | hlist_add_head(&nat->bysource, &bysource[srchash]); | 337 | hlist_add_head_rcu(&nat->bysource, &bysource[srchash]); |
338 | write_unlock_bh(&nf_nat_lock); | 338 | spin_unlock_bh(&nf_nat_lock); |
339 | } | 339 | } |
340 | 340 | ||
341 | /* It's done. */ | 341 | /* It's done. */ |
@@ -521,14 +521,14 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto) | |||
521 | { | 521 | { |
522 | int ret = 0; | 522 | int ret = 0; |
523 | 523 | ||
524 | write_lock_bh(&nf_nat_lock); | 524 | spin_lock_bh(&nf_nat_lock); |
525 | if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) { | 525 | if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) { |
526 | ret = -EBUSY; | 526 | ret = -EBUSY; |
527 | goto out; | 527 | goto out; |
528 | } | 528 | } |
529 | rcu_assign_pointer(nf_nat_protos[proto->protonum], proto); | 529 | rcu_assign_pointer(nf_nat_protos[proto->protonum], proto); |
530 | out: | 530 | out: |
531 | write_unlock_bh(&nf_nat_lock); | 531 | spin_unlock_bh(&nf_nat_lock); |
532 | return ret; | 532 | return ret; |
533 | } | 533 | } |
534 | EXPORT_SYMBOL(nf_nat_protocol_register); | 534 | EXPORT_SYMBOL(nf_nat_protocol_register); |
@@ -536,10 +536,10 @@ EXPORT_SYMBOL(nf_nat_protocol_register); | |||
536 | /* Noone stores the protocol anywhere; simply delete it. */ | 536 | /* Noone stores the protocol anywhere; simply delete it. */ |
537 | void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto) | 537 | void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto) |
538 | { | 538 | { |
539 | write_lock_bh(&nf_nat_lock); | 539 | spin_lock_bh(&nf_nat_lock); |
540 | rcu_assign_pointer(nf_nat_protos[proto->protonum], | 540 | rcu_assign_pointer(nf_nat_protos[proto->protonum], |
541 | &nf_nat_unknown_protocol); | 541 | &nf_nat_unknown_protocol); |
542 | write_unlock_bh(&nf_nat_lock); | 542 | spin_unlock_bh(&nf_nat_lock); |
543 | synchronize_rcu(); | 543 | synchronize_rcu(); |
544 | } | 544 | } |
545 | EXPORT_SYMBOL(nf_nat_protocol_unregister); | 545 | EXPORT_SYMBOL(nf_nat_protocol_unregister); |
@@ -594,10 +594,10 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) | |||
594 | 594 | ||
595 | NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK); | 595 | NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK); |
596 | 596 | ||
597 | write_lock_bh(&nf_nat_lock); | 597 | spin_lock_bh(&nf_nat_lock); |
598 | hlist_del(&nat->bysource); | 598 | hlist_del_rcu(&nat->bysource); |
599 | nat->ct = NULL; | 599 | nat->ct = NULL; |
600 | write_unlock_bh(&nf_nat_lock); | 600 | spin_unlock_bh(&nf_nat_lock); |
601 | } | 601 | } |
602 | 602 | ||
603 | static void nf_nat_move_storage(struct nf_conn *conntrack, void *old) | 603 | static void nf_nat_move_storage(struct nf_conn *conntrack, void *old) |
@@ -609,10 +609,10 @@ static void nf_nat_move_storage(struct nf_conn *conntrack, void *old) | |||
609 | if (!ct || !(ct->status & IPS_NAT_DONE_MASK)) | 609 | if (!ct || !(ct->status & IPS_NAT_DONE_MASK)) |
610 | return; | 610 | return; |
611 | 611 | ||
612 | write_lock_bh(&nf_nat_lock); | 612 | spin_lock_bh(&nf_nat_lock); |
613 | hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); | 613 | hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); |
614 | new_nat->ct = ct; | 614 | new_nat->ct = ct; |
615 | write_unlock_bh(&nf_nat_lock); | 615 | spin_unlock_bh(&nf_nat_lock); |
616 | } | 616 | } |
617 | 617 | ||
618 | static struct nf_ct_ext_type nat_extend __read_mostly = { | 618 | static struct nf_ct_ext_type nat_extend __read_mostly = { |
@@ -646,17 +646,13 @@ static int __init nf_nat_init(void) | |||
646 | } | 646 | } |
647 | 647 | ||
648 | /* Sew in builtin protocols. */ | 648 | /* Sew in builtin protocols. */ |
649 | write_lock_bh(&nf_nat_lock); | 649 | spin_lock_bh(&nf_nat_lock); |
650 | for (i = 0; i < MAX_IP_NAT_PROTO; i++) | 650 | for (i = 0; i < MAX_IP_NAT_PROTO; i++) |
651 | rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol); | 651 | rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol); |
652 | rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp); | 652 | rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp); |
653 | rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp); | 653 | rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp); |
654 | rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp); | 654 | rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp); |
655 | write_unlock_bh(&nf_nat_lock); | 655 | spin_unlock_bh(&nf_nat_lock); |
656 | |||
657 | for (i = 0; i < nf_nat_htable_size; i++) { | ||
658 | INIT_HLIST_HEAD(&bysource[i]); | ||
659 | } | ||
660 | 656 | ||
661 | /* Initialize fake conntrack so that NAT will skip it */ | 657 | /* Initialize fake conntrack so that NAT will skip it */ |
662 | nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; | 658 | nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; |
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index a121989fdad7..ee47bf28c825 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c | |||
@@ -32,7 +32,8 @@ static int set_addr(struct sk_buff *skb, | |||
32 | __be32 ip; | 32 | __be32 ip; |
33 | __be16 port; | 33 | __be16 port; |
34 | } __attribute__ ((__packed__)) buf; | 34 | } __attribute__ ((__packed__)) buf; |
35 | struct tcphdr _tcph, *th; | 35 | const struct tcphdr *th; |
36 | struct tcphdr _tcph; | ||
36 | 37 | ||
37 | buf.ip = ip; | 38 | buf.ip = ip; |
38 | buf.port = port; | 39 | buf.port = port; |
@@ -99,7 +100,7 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, | |||
99 | unsigned char **data, | 100 | unsigned char **data, |
100 | TransportAddress *taddr, int count) | 101 | TransportAddress *taddr, int count) |
101 | { | 102 | { |
102 | struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; | 103 | const struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; |
103 | int dir = CTINFO2DIR(ctinfo); | 104 | int dir = CTINFO2DIR(ctinfo); |
104 | int i; | 105 | int i; |
105 | __be16 port; | 106 | __be16 port; |
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 4c0232842e75..ca57f47bbd25 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c | |||
@@ -44,8 +44,7 @@ adjust_tcp_sequence(u32 seq, | |||
44 | struct nf_nat_seq *this_way, *other_way; | 44 | struct nf_nat_seq *this_way, *other_way; |
45 | struct nf_conn_nat *nat = nfct_nat(ct); | 45 | struct nf_conn_nat *nat = nfct_nat(ct); |
46 | 46 | ||
47 | pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", | 47 | pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", seq, seq); |
48 | ntohl(seq), seq); | ||
49 | 48 | ||
50 | dir = CTINFO2DIR(ctinfo); | 49 | dir = CTINFO2DIR(ctinfo); |
51 | 50 | ||
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index e63b944a2ebb..3a1e6d6afc0a 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c | |||
@@ -40,11 +40,11 @@ MODULE_ALIAS("ip_nat_pptp"); | |||
40 | static void pptp_nat_expected(struct nf_conn *ct, | 40 | static void pptp_nat_expected(struct nf_conn *ct, |
41 | struct nf_conntrack_expect *exp) | 41 | struct nf_conntrack_expect *exp) |
42 | { | 42 | { |
43 | struct nf_conn *master = ct->master; | 43 | const struct nf_conn *master = ct->master; |
44 | struct nf_conntrack_expect *other_exp; | 44 | struct nf_conntrack_expect *other_exp; |
45 | struct nf_conntrack_tuple t; | 45 | struct nf_conntrack_tuple t; |
46 | struct nf_ct_pptp_master *ct_pptp_info; | 46 | const struct nf_ct_pptp_master *ct_pptp_info; |
47 | struct nf_nat_pptp *nat_pptp_info; | 47 | const struct nf_nat_pptp *nat_pptp_info; |
48 | struct nf_nat_range range; | 48 | struct nf_nat_range range; |
49 | 49 | ||
50 | ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; | 50 | ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; |
@@ -186,7 +186,7 @@ static void | |||
186 | pptp_exp_gre(struct nf_conntrack_expect *expect_orig, | 186 | pptp_exp_gre(struct nf_conntrack_expect *expect_orig, |
187 | struct nf_conntrack_expect *expect_reply) | 187 | struct nf_conntrack_expect *expect_reply) |
188 | { | 188 | { |
189 | struct nf_conn *ct = expect_orig->master; | 189 | const struct nf_conn *ct = expect_orig->master; |
190 | struct nf_ct_pptp_master *ct_pptp_info; | 190 | struct nf_ct_pptp_master *ct_pptp_info; |
191 | struct nf_nat_pptp *nat_pptp_info; | 191 | struct nf_nat_pptp *nat_pptp_info; |
192 | 192 | ||
@@ -217,7 +217,7 @@ pptp_inbound_pkt(struct sk_buff *skb, | |||
217 | struct PptpControlHeader *ctlh, | 217 | struct PptpControlHeader *ctlh, |
218 | union pptp_ctrl_union *pptpReq) | 218 | union pptp_ctrl_union *pptpReq) |
219 | { | 219 | { |
220 | struct nf_nat_pptp *nat_pptp_info; | 220 | const struct nf_nat_pptp *nat_pptp_info; |
221 | u_int16_t msg; | 221 | u_int16_t msg; |
222 | __be16 new_pcid; | 222 | __be16 new_pcid; |
223 | unsigned int pcid_off; | 223 | unsigned int pcid_off; |
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index 9fa272e73113..a1e4da16da2e 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c | |||
@@ -59,7 +59,7 @@ static int | |||
59 | gre_unique_tuple(struct nf_conntrack_tuple *tuple, | 59 | gre_unique_tuple(struct nf_conntrack_tuple *tuple, |
60 | const struct nf_nat_range *range, | 60 | const struct nf_nat_range *range, |
61 | enum nf_nat_manip_type maniptype, | 61 | enum nf_nat_manip_type maniptype, |
62 | const struct nf_conn *conntrack) | 62 | const struct nf_conn *ct) |
63 | { | 63 | { |
64 | static u_int16_t key; | 64 | static u_int16_t key; |
65 | __be16 *keyptr; | 65 | __be16 *keyptr; |
@@ -67,7 +67,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
67 | 67 | ||
68 | /* If there is no master conntrack we are not PPTP, | 68 | /* If there is no master conntrack we are not PPTP, |
69 | do not change tuples */ | 69 | do not change tuples */ |
70 | if (!conntrack->master) | 70 | if (!ct->master) |
71 | return 0; | 71 | return 0; |
72 | 72 | ||
73 | if (maniptype == IP_NAT_MANIP_SRC) | 73 | if (maniptype == IP_NAT_MANIP_SRC) |
@@ -76,7 +76,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
76 | keyptr = &tuple->dst.u.gre.key; | 76 | keyptr = &tuple->dst.u.gre.key; |
77 | 77 | ||
78 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { | 78 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { |
79 | pr_debug("%p: NATing GRE PPTP\n", conntrack); | 79 | pr_debug("%p: NATing GRE PPTP\n", ct); |
80 | min = 1; | 80 | min = 1; |
81 | range_size = 0xffff; | 81 | range_size = 0xffff; |
82 | } else { | 82 | } else { |
@@ -88,11 +88,11 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
88 | 88 | ||
89 | for (i = 0; i < range_size; i++, key++) { | 89 | for (i = 0; i < range_size; i++, key++) { |
90 | *keyptr = htons(min + key % range_size); | 90 | *keyptr = htons(min + key % range_size); |
91 | if (!nf_nat_used_tuple(tuple, conntrack)) | 91 | if (!nf_nat_used_tuple(tuple, ct)) |
92 | return 1; | 92 | return 1; |
93 | } | 93 | } |
94 | 94 | ||
95 | pr_debug("%p: no NAT mapping\n", conntrack); | 95 | pr_debug("%p: no NAT mapping\n", ct); |
96 | return 0; | 96 | return 0; |
97 | } | 97 | } |
98 | 98 | ||
@@ -104,7 +104,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, | |||
104 | { | 104 | { |
105 | struct gre_hdr *greh; | 105 | struct gre_hdr *greh; |
106 | struct gre_hdr_pptp *pgreh; | 106 | struct gre_hdr_pptp *pgreh; |
107 | struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); | 107 | const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); |
108 | unsigned int hdroff = iphdroff + iph->ihl * 4; | 108 | unsigned int hdroff = iphdroff + iph->ihl * 4; |
109 | 109 | ||
110 | /* pgreh includes two optional 32bit fields which are not required | 110 | /* pgreh includes two optional 32bit fields which are not required |
@@ -148,12 +148,12 @@ static const struct nf_nat_protocol gre = { | |||
148 | #endif | 148 | #endif |
149 | }; | 149 | }; |
150 | 150 | ||
151 | int __init nf_nat_proto_gre_init(void) | 151 | static int __init nf_nat_proto_gre_init(void) |
152 | { | 152 | { |
153 | return nf_nat_protocol_register(&gre); | 153 | return nf_nat_protocol_register(&gre); |
154 | } | 154 | } |
155 | 155 | ||
156 | void __exit nf_nat_proto_gre_fini(void) | 156 | static void __exit nf_nat_proto_gre_fini(void) |
157 | { | 157 | { |
158 | nf_nat_protocol_unregister(&gre); | 158 | nf_nat_protocol_unregister(&gre); |
159 | } | 159 | } |
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index a0e44c953cb6..03a02969aa57 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c | |||
@@ -57,7 +57,7 @@ icmp_manip_pkt(struct sk_buff *skb, | |||
57 | const struct nf_conntrack_tuple *tuple, | 57 | const struct nf_conntrack_tuple *tuple, |
58 | enum nf_nat_manip_type maniptype) | 58 | enum nf_nat_manip_type maniptype) |
59 | { | 59 | { |
60 | struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); | 60 | const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); |
61 | struct icmphdr *hdr; | 61 | struct icmphdr *hdr; |
62 | unsigned int hdroff = iphdroff + iph->ihl*4; | 62 | unsigned int hdroff = iphdroff + iph->ihl*4; |
63 | 63 | ||
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c index da23e9fbe679..ffd5d1589eca 100644 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c | |||
@@ -93,7 +93,7 @@ tcp_manip_pkt(struct sk_buff *skb, | |||
93 | const struct nf_conntrack_tuple *tuple, | 93 | const struct nf_conntrack_tuple *tuple, |
94 | enum nf_nat_manip_type maniptype) | 94 | enum nf_nat_manip_type maniptype) |
95 | { | 95 | { |
96 | struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); | 96 | const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); |
97 | struct tcphdr *hdr; | 97 | struct tcphdr *hdr; |
98 | unsigned int hdroff = iphdroff + iph->ihl*4; | 98 | unsigned int hdroff = iphdroff + iph->ihl*4; |
99 | __be32 oldip, newip; | 99 | __be32 oldip, newip; |
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c index 10df4db078af..4b8f49910ff2 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c | |||
@@ -91,7 +91,7 @@ udp_manip_pkt(struct sk_buff *skb, | |||
91 | const struct nf_conntrack_tuple *tuple, | 91 | const struct nf_conntrack_tuple *tuple, |
92 | enum nf_nat_manip_type maniptype) | 92 | enum nf_nat_manip_type maniptype) |
93 | { | 93 | { |
94 | struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); | 94 | const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); |
95 | struct udphdr *hdr; | 95 | struct udphdr *hdr; |
96 | unsigned int hdroff = iphdroff + iph->ihl*4; | 96 | unsigned int hdroff = iphdroff + iph->ihl*4; |
97 | __be32 oldip, newip; | 97 | __be32 oldip, newip; |
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 519182269e76..f8fda57ba20b 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c | |||
@@ -58,13 +58,14 @@ static struct | |||
58 | .term = IPT_ERROR_INIT, /* ERROR */ | 58 | .term = IPT_ERROR_INIT, /* ERROR */ |
59 | }; | 59 | }; |
60 | 60 | ||
61 | static struct xt_table nat_table = { | 61 | static struct xt_table __nat_table = { |
62 | .name = "nat", | 62 | .name = "nat", |
63 | .valid_hooks = NAT_VALID_HOOKS, | 63 | .valid_hooks = NAT_VALID_HOOKS, |
64 | .lock = RW_LOCK_UNLOCKED, | 64 | .lock = RW_LOCK_UNLOCKED, |
65 | .me = THIS_MODULE, | 65 | .me = THIS_MODULE, |
66 | .af = AF_INET, | 66 | .af = AF_INET, |
67 | }; | 67 | }; |
68 | static struct xt_table *nat_table; | ||
68 | 69 | ||
69 | /* Source NAT */ | 70 | /* Source NAT */ |
70 | static unsigned int ipt_snat_target(struct sk_buff *skb, | 71 | static unsigned int ipt_snat_target(struct sk_buff *skb, |
@@ -214,7 +215,7 @@ int nf_nat_rule_find(struct sk_buff *skb, | |||
214 | { | 215 | { |
215 | int ret; | 216 | int ret; |
216 | 217 | ||
217 | ret = ipt_do_table(skb, hooknum, in, out, &nat_table); | 218 | ret = ipt_do_table(skb, hooknum, in, out, nat_table); |
218 | 219 | ||
219 | if (ret == NF_ACCEPT) { | 220 | if (ret == NF_ACCEPT) { |
220 | if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) | 221 | if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) |
@@ -248,9 +249,10 @@ int __init nf_nat_rule_init(void) | |||
248 | { | 249 | { |
249 | int ret; | 250 | int ret; |
250 | 251 | ||
251 | ret = ipt_register_table(&nat_table, &nat_initial_table.repl); | 252 | nat_table = ipt_register_table(&init_net, &__nat_table, |
252 | if (ret != 0) | 253 | &nat_initial_table.repl); |
253 | return ret; | 254 | if (IS_ERR(nat_table)) |
255 | return PTR_ERR(nat_table); | ||
254 | ret = xt_register_target(&ipt_snat_reg); | 256 | ret = xt_register_target(&ipt_snat_reg); |
255 | if (ret != 0) | 257 | if (ret != 0) |
256 | goto unregister_table; | 258 | goto unregister_table; |
@@ -264,7 +266,7 @@ int __init nf_nat_rule_init(void) | |||
264 | unregister_snat: | 266 | unregister_snat: |
265 | xt_unregister_target(&ipt_snat_reg); | 267 | xt_unregister_target(&ipt_snat_reg); |
266 | unregister_table: | 268 | unregister_table: |
267 | ipt_unregister_table(&nat_table); | 269 | ipt_unregister_table(nat_table); |
268 | 270 | ||
269 | return ret; | 271 | return ret; |
270 | } | 272 | } |
@@ -273,5 +275,5 @@ void nf_nat_rule_cleanup(void) | |||
273 | { | 275 | { |
274 | xt_unregister_target(&ipt_dnat_reg); | 276 | xt_unregister_target(&ipt_dnat_reg); |
275 | xt_unregister_target(&ipt_snat_reg); | 277 | xt_unregister_target(&ipt_snat_reg); |
276 | ipt_unregister_table(&nat_table); | 278 | ipt_unregister_table(nat_table); |
277 | } | 279 | } |
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 606a170bf4ca..b4c8d4968bb2 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c | |||
@@ -35,9 +35,9 @@ struct addr_map { | |||
35 | } addr[IP_CT_DIR_MAX]; | 35 | } addr[IP_CT_DIR_MAX]; |
36 | }; | 36 | }; |
37 | 37 | ||
38 | static void addr_map_init(struct nf_conn *ct, struct addr_map *map) | 38 | static void addr_map_init(const struct nf_conn *ct, struct addr_map *map) |
39 | { | 39 | { |
40 | struct nf_conntrack_tuple *t; | 40 | const struct nf_conntrack_tuple *t; |
41 | enum ip_conntrack_dir dir; | 41 | enum ip_conntrack_dir dir; |
42 | unsigned int n; | 42 | unsigned int n; |
43 | 43 | ||
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 07f2a49926d4..540ce6ae887c 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c | |||
@@ -260,7 +260,7 @@ static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc) | |||
260 | { | 260 | { |
261 | unsigned char ch; | 261 | unsigned char ch; |
262 | 262 | ||
263 | if (eoc == 0) { | 263 | if (eoc == NULL) { |
264 | if (!asn1_octet_decode(ctx, &ch)) | 264 | if (!asn1_octet_decode(ctx, &ch)) |
265 | return 0; | 265 | return 0; |
266 | 266 | ||
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c index 1360a94766dd..b096e81500ae 100644 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ b/net/ipv4/netfilter/nf_nat_tftp.c | |||
@@ -24,7 +24,7 @@ static unsigned int help(struct sk_buff *skb, | |||
24 | enum ip_conntrack_info ctinfo, | 24 | enum ip_conntrack_info ctinfo, |
25 | struct nf_conntrack_expect *exp) | 25 | struct nf_conntrack_expect *exp) |
26 | { | 26 | { |
27 | struct nf_conn *ct = exp->master; | 27 | const struct nf_conn *ct = exp->master; |
28 | 28 | ||
29 | exp->saved_proto.udp.port | 29 | exp->saved_proto.udp.port |
30 | = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; | 30 | = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 85c08696abbe..a3002fe65b7f 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -352,6 +352,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, | |||
352 | skb_reserve(skb, hh_len); | 352 | skb_reserve(skb, hh_len); |
353 | 353 | ||
354 | skb->priority = sk->sk_priority; | 354 | skb->priority = sk->sk_priority; |
355 | skb->mark = sk->sk_mark; | ||
355 | skb->dst = dst_clone(&rt->u.dst); | 356 | skb->dst = dst_clone(&rt->u.dst); |
356 | 357 | ||
357 | skb_reset_network_header(skb); | 358 | skb_reset_network_header(skb); |
@@ -544,6 +545,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
544 | 545 | ||
545 | { | 546 | { |
546 | struct flowi fl = { .oif = ipc.oif, | 547 | struct flowi fl = { .oif = ipc.oif, |
548 | .mark = sk->sk_mark, | ||
547 | .nl_u = { .ip4_u = | 549 | .nl_u = { .ip4_u = |
548 | { .daddr = daddr, | 550 | { .daddr = daddr, |
549 | .saddr = saddr, | 551 | .saddr = saddr, |
@@ -860,8 +862,7 @@ static struct sock *raw_get_first(struct seq_file *seq) | |||
860 | struct hlist_node *node; | 862 | struct hlist_node *node; |
861 | 863 | ||
862 | sk_for_each(sk, node, &state->h->ht[state->bucket]) | 864 | sk_for_each(sk, node, &state->h->ht[state->bucket]) |
863 | if (sk->sk_net == state->p.net && | 865 | if (sk->sk_net == state->p.net) |
864 | sk->sk_family == state->family) | ||
865 | goto found; | 866 | goto found; |
866 | } | 867 | } |
867 | sk = NULL; | 868 | sk = NULL; |
@@ -877,8 +878,7 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) | |||
877 | sk = sk_next(sk); | 878 | sk = sk_next(sk); |
878 | try_again: | 879 | try_again: |
879 | ; | 880 | ; |
880 | } while (sk && sk->sk_net != state->p.net && | 881 | } while (sk && sk->sk_net != state->p.net); |
881 | sk->sk_family != state->family); | ||
882 | 882 | ||
883 | if (!sk && ++state->bucket < RAW_HTABLE_SIZE) { | 883 | if (!sk && ++state->bucket < RAW_HTABLE_SIZE) { |
884 | sk = sk_head(&state->h->ht[state->bucket]); | 884 | sk = sk_head(&state->h->ht[state->bucket]); |
@@ -927,7 +927,7 @@ void raw_seq_stop(struct seq_file *seq, void *v) | |||
927 | } | 927 | } |
928 | EXPORT_SYMBOL_GPL(raw_seq_stop); | 928 | EXPORT_SYMBOL_GPL(raw_seq_stop); |
929 | 929 | ||
930 | static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i) | 930 | static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) |
931 | { | 931 | { |
932 | struct inet_sock *inet = inet_sk(sp); | 932 | struct inet_sock *inet = inet_sk(sp); |
933 | __be32 dest = inet->daddr, | 933 | __be32 dest = inet->daddr, |
@@ -935,33 +935,23 @@ static __inline__ char *get_raw_sock(struct sock *sp, char *tmpbuf, int i) | |||
935 | __u16 destp = 0, | 935 | __u16 destp = 0, |
936 | srcp = inet->num; | 936 | srcp = inet->num; |
937 | 937 | ||
938 | sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" | 938 | seq_printf(seq, "%4d: %08X:%04X %08X:%04X" |
939 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d", | 939 | " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d", |
940 | i, src, srcp, dest, destp, sp->sk_state, | 940 | i, src, srcp, dest, destp, sp->sk_state, |
941 | atomic_read(&sp->sk_wmem_alloc), | 941 | atomic_read(&sp->sk_wmem_alloc), |
942 | atomic_read(&sp->sk_rmem_alloc), | 942 | atomic_read(&sp->sk_rmem_alloc), |
943 | 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), | 943 | 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), |
944 | atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); | 944 | atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); |
945 | return tmpbuf; | ||
946 | } | 945 | } |
947 | 946 | ||
948 | #define TMPSZ 128 | ||
949 | |||
950 | static int raw_seq_show(struct seq_file *seq, void *v) | 947 | static int raw_seq_show(struct seq_file *seq, void *v) |
951 | { | 948 | { |
952 | char tmpbuf[TMPSZ+1]; | ||
953 | |||
954 | if (v == SEQ_START_TOKEN) | 949 | if (v == SEQ_START_TOKEN) |
955 | seq_printf(seq, "%-*s\n", TMPSZ-1, | 950 | seq_printf(seq, " sl local_address rem_address st tx_queue " |
956 | " sl local_address rem_address st tx_queue " | 951 | "rx_queue tr tm->when retrnsmt uid timeout " |
957 | "rx_queue tr tm->when retrnsmt uid timeout " | 952 | "inode drops\n"); |
958 | "inode drops"); | 953 | else |
959 | else { | 954 | raw_sock_seq_show(seq, v, raw_seq_private(seq)->bucket); |
960 | struct raw_iter_state *state = raw_seq_private(seq); | ||
961 | |||
962 | seq_printf(seq, "%-*s\n", TMPSZ-1, | ||
963 | get_raw_sock(v, tmpbuf, state->bucket)); | ||
964 | } | ||
965 | return 0; | 955 | return 0; |
966 | } | 956 | } |
967 | 957 | ||
@@ -972,27 +962,25 @@ static const struct seq_operations raw_seq_ops = { | |||
972 | .show = raw_seq_show, | 962 | .show = raw_seq_show, |
973 | }; | 963 | }; |
974 | 964 | ||
975 | int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h, | 965 | int raw_seq_open(struct inode *ino, struct file *file, |
976 | unsigned short family) | 966 | struct raw_hashinfo *h, const struct seq_operations *ops) |
977 | { | 967 | { |
978 | int err; | 968 | int err; |
979 | struct raw_iter_state *i; | 969 | struct raw_iter_state *i; |
980 | 970 | ||
981 | err = seq_open_net(ino, file, &raw_seq_ops, | 971 | err = seq_open_net(ino, file, ops, sizeof(struct raw_iter_state)); |
982 | sizeof(struct raw_iter_state)); | ||
983 | if (err < 0) | 972 | if (err < 0) |
984 | return err; | 973 | return err; |
985 | 974 | ||
986 | i = raw_seq_private((struct seq_file *)file->private_data); | 975 | i = raw_seq_private((struct seq_file *)file->private_data); |
987 | i->h = h; | 976 | i->h = h; |
988 | i->family = family; | ||
989 | return 0; | 977 | return 0; |
990 | } | 978 | } |
991 | EXPORT_SYMBOL_GPL(raw_seq_open); | 979 | EXPORT_SYMBOL_GPL(raw_seq_open); |
992 | 980 | ||
993 | static int raw_v4_seq_open(struct inode *inode, struct file *file) | 981 | static int raw_v4_seq_open(struct inode *inode, struct file *file) |
994 | { | 982 | { |
995 | return raw_seq_open(inode, file, &raw_v4_hashinfo, PF_INET); | 983 | return raw_seq_open(inode, file, &raw_v4_hashinfo, &raw_seq_ops); |
996 | } | 984 | } |
997 | 985 | ||
998 | static const struct file_operations raw_seq_fops = { | 986 | static const struct file_operations raw_seq_fops = { |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 896c768e41a2..8842ecb9be48 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -117,8 +117,6 @@ | |||
117 | 117 | ||
118 | #define RT_GC_TIMEOUT (300*HZ) | 118 | #define RT_GC_TIMEOUT (300*HZ) |
119 | 119 | ||
120 | static int ip_rt_min_delay = 2 * HZ; | ||
121 | static int ip_rt_max_delay = 10 * HZ; | ||
122 | static int ip_rt_max_size; | 120 | static int ip_rt_max_size; |
123 | static int ip_rt_gc_timeout = RT_GC_TIMEOUT; | 121 | static int ip_rt_gc_timeout = RT_GC_TIMEOUT; |
124 | static int ip_rt_gc_interval = 60 * HZ; | 122 | static int ip_rt_gc_interval = 60 * HZ; |
@@ -133,12 +131,9 @@ static int ip_rt_mtu_expires = 10 * 60 * HZ; | |||
133 | static int ip_rt_min_pmtu = 512 + 20 + 20; | 131 | static int ip_rt_min_pmtu = 512 + 20 + 20; |
134 | static int ip_rt_min_advmss = 256; | 132 | static int ip_rt_min_advmss = 256; |
135 | static int ip_rt_secret_interval = 10 * 60 * HZ; | 133 | static int ip_rt_secret_interval = 10 * 60 * HZ; |
136 | static int ip_rt_flush_expected; | ||
137 | static unsigned long rt_deadline; | ||
138 | 134 | ||
139 | #define RTprint(a...) printk(KERN_DEBUG a) | 135 | #define RTprint(a...) printk(KERN_DEBUG a) |
140 | 136 | ||
141 | static struct timer_list rt_flush_timer; | ||
142 | static void rt_worker_func(struct work_struct *work); | 137 | static void rt_worker_func(struct work_struct *work); |
143 | static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); | 138 | static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); |
144 | static struct timer_list rt_secret_timer; | 139 | static struct timer_list rt_secret_timer; |
@@ -169,6 +164,7 @@ static struct dst_ops ipv4_dst_ops = { | |||
169 | .update_pmtu = ip_rt_update_pmtu, | 164 | .update_pmtu = ip_rt_update_pmtu, |
170 | .local_out = ip_local_out, | 165 | .local_out = ip_local_out, |
171 | .entry_size = sizeof(struct rtable), | 166 | .entry_size = sizeof(struct rtable), |
167 | .entries = ATOMIC_INIT(0), | ||
172 | }; | 168 | }; |
173 | 169 | ||
174 | #define ECN_OR_COST(class) TC_PRIO_##class | 170 | #define ECN_OR_COST(class) TC_PRIO_##class |
@@ -259,19 +255,16 @@ static inline void rt_hash_lock_init(void) | |||
259 | static struct rt_hash_bucket *rt_hash_table; | 255 | static struct rt_hash_bucket *rt_hash_table; |
260 | static unsigned rt_hash_mask; | 256 | static unsigned rt_hash_mask; |
261 | static unsigned int rt_hash_log; | 257 | static unsigned int rt_hash_log; |
262 | static unsigned int rt_hash_rnd; | 258 | static atomic_t rt_genid; |
263 | 259 | ||
264 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); | 260 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); |
265 | #define RT_CACHE_STAT_INC(field) \ | 261 | #define RT_CACHE_STAT_INC(field) \ |
266 | (__raw_get_cpu_var(rt_cache_stat).field++) | 262 | (__raw_get_cpu_var(rt_cache_stat).field++) |
267 | 263 | ||
268 | static int rt_intern_hash(unsigned hash, struct rtable *rth, | ||
269 | struct rtable **res); | ||
270 | |||
271 | static unsigned int rt_hash_code(u32 daddr, u32 saddr) | 264 | static unsigned int rt_hash_code(u32 daddr, u32 saddr) |
272 | { | 265 | { |
273 | return (jhash_2words(daddr, saddr, rt_hash_rnd) | 266 | return jhash_2words(daddr, saddr, atomic_read(&rt_genid)) |
274 | & rt_hash_mask); | 267 | & rt_hash_mask; |
275 | } | 268 | } |
276 | 269 | ||
277 | #define rt_hash(daddr, saddr, idx) \ | 270 | #define rt_hash(daddr, saddr, idx) \ |
@@ -281,27 +274,28 @@ static unsigned int rt_hash_code(u32 daddr, u32 saddr) | |||
281 | #ifdef CONFIG_PROC_FS | 274 | #ifdef CONFIG_PROC_FS |
282 | struct rt_cache_iter_state { | 275 | struct rt_cache_iter_state { |
283 | int bucket; | 276 | int bucket; |
277 | int genid; | ||
284 | }; | 278 | }; |
285 | 279 | ||
286 | static struct rtable *rt_cache_get_first(struct seq_file *seq) | 280 | static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st) |
287 | { | 281 | { |
288 | struct rtable *r = NULL; | 282 | struct rtable *r = NULL; |
289 | struct rt_cache_iter_state *st = seq->private; | ||
290 | 283 | ||
291 | for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { | 284 | for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { |
292 | rcu_read_lock_bh(); | 285 | rcu_read_lock_bh(); |
293 | r = rt_hash_table[st->bucket].chain; | 286 | r = rcu_dereference(rt_hash_table[st->bucket].chain); |
294 | if (r) | 287 | while (r) { |
295 | break; | 288 | if (r->rt_genid == st->genid) |
289 | return r; | ||
290 | r = rcu_dereference(r->u.dst.rt_next); | ||
291 | } | ||
296 | rcu_read_unlock_bh(); | 292 | rcu_read_unlock_bh(); |
297 | } | 293 | } |
298 | return rcu_dereference(r); | 294 | return r; |
299 | } | 295 | } |
300 | 296 | ||
301 | static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r) | 297 | static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct rtable *r) |
302 | { | 298 | { |
303 | struct rt_cache_iter_state *st = seq->private; | ||
304 | |||
305 | r = r->u.dst.rt_next; | 299 | r = r->u.dst.rt_next; |
306 | while (!r) { | 300 | while (!r) { |
307 | rcu_read_unlock_bh(); | 301 | rcu_read_unlock_bh(); |
@@ -313,29 +307,38 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r) | |||
313 | return rcu_dereference(r); | 307 | return rcu_dereference(r); |
314 | } | 308 | } |
315 | 309 | ||
316 | static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos) | 310 | static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t pos) |
317 | { | 311 | { |
318 | struct rtable *r = rt_cache_get_first(seq); | 312 | struct rtable *r = rt_cache_get_first(st); |
319 | 313 | ||
320 | if (r) | 314 | if (r) |
321 | while (pos && (r = rt_cache_get_next(seq, r))) | 315 | while (pos && (r = rt_cache_get_next(st, r))) { |
316 | if (r->rt_genid != st->genid) | ||
317 | continue; | ||
322 | --pos; | 318 | --pos; |
319 | } | ||
323 | return pos ? NULL : r; | 320 | return pos ? NULL : r; |
324 | } | 321 | } |
325 | 322 | ||
326 | static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) | 323 | static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) |
327 | { | 324 | { |
328 | return *pos ? rt_cache_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; | 325 | struct rt_cache_iter_state *st = seq->private; |
326 | |||
327 | if (*pos) | ||
328 | return rt_cache_get_idx(st, *pos - 1); | ||
329 | st->genid = atomic_read(&rt_genid); | ||
330 | return SEQ_START_TOKEN; | ||
329 | } | 331 | } |
330 | 332 | ||
331 | static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 333 | static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
332 | { | 334 | { |
333 | struct rtable *r = NULL; | 335 | struct rtable *r; |
336 | struct rt_cache_iter_state *st = seq->private; | ||
334 | 337 | ||
335 | if (v == SEQ_START_TOKEN) | 338 | if (v == SEQ_START_TOKEN) |
336 | r = rt_cache_get_first(seq); | 339 | r = rt_cache_get_first(st); |
337 | else | 340 | else |
338 | r = rt_cache_get_next(seq, v); | 341 | r = rt_cache_get_next(st, v); |
339 | ++*pos; | 342 | ++*pos; |
340 | return r; | 343 | return r; |
341 | } | 344 | } |
@@ -708,6 +711,11 @@ static void rt_check_expire(void) | |||
708 | continue; | 711 | continue; |
709 | spin_lock_bh(rt_hash_lock_addr(i)); | 712 | spin_lock_bh(rt_hash_lock_addr(i)); |
710 | while ((rth = *rthp) != NULL) { | 713 | while ((rth = *rthp) != NULL) { |
714 | if (rth->rt_genid != atomic_read(&rt_genid)) { | ||
715 | *rthp = rth->u.dst.rt_next; | ||
716 | rt_free(rth); | ||
717 | continue; | ||
718 | } | ||
711 | if (rth->u.dst.expires) { | 719 | if (rth->u.dst.expires) { |
712 | /* Entry is expired even if it is in use */ | 720 | /* Entry is expired even if it is in use */ |
713 | if (time_before_eq(jiffies, rth->u.dst.expires)) { | 721 | if (time_before_eq(jiffies, rth->u.dst.expires)) { |
@@ -732,83 +740,45 @@ static void rt_check_expire(void) | |||
732 | 740 | ||
733 | /* | 741 | /* |
734 | * rt_worker_func() is run in process context. | 742 | * rt_worker_func() is run in process context. |
735 | * If a whole flush was scheduled, it is done. | 743 | * we call rt_check_expire() to scan part of the hash table |
736 | * Else, we call rt_check_expire() to scan part of the hash table | ||
737 | */ | 744 | */ |
738 | static void rt_worker_func(struct work_struct *work) | 745 | static void rt_worker_func(struct work_struct *work) |
739 | { | 746 | { |
740 | if (ip_rt_flush_expected) { | 747 | rt_check_expire(); |
741 | ip_rt_flush_expected = 0; | ||
742 | rt_do_flush(1); | ||
743 | } else | ||
744 | rt_check_expire(); | ||
745 | schedule_delayed_work(&expires_work, ip_rt_gc_interval); | 748 | schedule_delayed_work(&expires_work, ip_rt_gc_interval); |
746 | } | 749 | } |
747 | 750 | ||
748 | /* This can run from both BH and non-BH contexts, the latter | 751 | /* |
749 | * in the case of a forced flush event. | 752 | * Pertubation of rt_genid by a small quantity [1..256] |
753 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() | ||
754 | * many times (2^24) without giving recent rt_genid. | ||
755 | * Jenkins hash is strong enough that litle changes of rt_genid are OK. | ||
750 | */ | 756 | */ |
751 | static void rt_run_flush(unsigned long process_context) | 757 | static void rt_cache_invalidate(void) |
752 | { | 758 | { |
753 | rt_deadline = 0; | 759 | unsigned char shuffle; |
754 | |||
755 | get_random_bytes(&rt_hash_rnd, 4); | ||
756 | 760 | ||
757 | rt_do_flush(process_context); | 761 | get_random_bytes(&shuffle, sizeof(shuffle)); |
762 | atomic_add(shuffle + 1U, &rt_genid); | ||
758 | } | 763 | } |
759 | 764 | ||
760 | static DEFINE_SPINLOCK(rt_flush_lock); | 765 | /* |
761 | 766 | * delay < 0 : invalidate cache (fast : entries will be deleted later) | |
767 | * delay >= 0 : invalidate & flush cache (can be long) | ||
768 | */ | ||
762 | void rt_cache_flush(int delay) | 769 | void rt_cache_flush(int delay) |
763 | { | 770 | { |
764 | unsigned long now = jiffies; | 771 | rt_cache_invalidate(); |
765 | int user_mode = !in_softirq(); | 772 | if (delay >= 0) |
766 | 773 | rt_do_flush(!in_softirq()); | |
767 | if (delay < 0) | ||
768 | delay = ip_rt_min_delay; | ||
769 | |||
770 | spin_lock_bh(&rt_flush_lock); | ||
771 | |||
772 | if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) { | ||
773 | long tmo = (long)(rt_deadline - now); | ||
774 | |||
775 | /* If flush timer is already running | ||
776 | and flush request is not immediate (delay > 0): | ||
777 | |||
778 | if deadline is not achieved, prolongate timer to "delay", | ||
779 | otherwise fire it at deadline time. | ||
780 | */ | ||
781 | |||
782 | if (user_mode && tmo < ip_rt_max_delay-ip_rt_min_delay) | ||
783 | tmo = 0; | ||
784 | |||
785 | if (delay > tmo) | ||
786 | delay = tmo; | ||
787 | } | ||
788 | |||
789 | if (delay <= 0) { | ||
790 | spin_unlock_bh(&rt_flush_lock); | ||
791 | rt_run_flush(user_mode); | ||
792 | return; | ||
793 | } | ||
794 | |||
795 | if (rt_deadline == 0) | ||
796 | rt_deadline = now + ip_rt_max_delay; | ||
797 | |||
798 | mod_timer(&rt_flush_timer, now+delay); | ||
799 | spin_unlock_bh(&rt_flush_lock); | ||
800 | } | 774 | } |
801 | 775 | ||
802 | /* | 776 | /* |
803 | * We change rt_hash_rnd and ask next rt_worker_func() invocation | 777 | * We change rt_genid and let gc do the cleanup |
804 | * to perform a flush in process context | ||
805 | */ | 778 | */ |
806 | static void rt_secret_rebuild(unsigned long dummy) | 779 | static void rt_secret_rebuild(unsigned long dummy) |
807 | { | 780 | { |
808 | get_random_bytes(&rt_hash_rnd, 4); | 781 | rt_cache_invalidate(); |
809 | ip_rt_flush_expected = 1; | ||
810 | cancel_delayed_work(&expires_work); | ||
811 | schedule_delayed_work(&expires_work, HZ/10); | ||
812 | mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval); | 782 | mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval); |
813 | } | 783 | } |
814 | 784 | ||
@@ -885,7 +855,8 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
885 | rthp = &rt_hash_table[k].chain; | 855 | rthp = &rt_hash_table[k].chain; |
886 | spin_lock_bh(rt_hash_lock_addr(k)); | 856 | spin_lock_bh(rt_hash_lock_addr(k)); |
887 | while ((rth = *rthp) != NULL) { | 857 | while ((rth = *rthp) != NULL) { |
888 | if (!rt_may_expire(rth, tmo, expire)) { | 858 | if (rth->rt_genid == atomic_read(&rt_genid) && |
859 | !rt_may_expire(rth, tmo, expire)) { | ||
889 | tmo >>= 1; | 860 | tmo >>= 1; |
890 | rthp = &rth->u.dst.rt_next; | 861 | rthp = &rth->u.dst.rt_next; |
891 | continue; | 862 | continue; |
@@ -966,6 +937,11 @@ restart: | |||
966 | 937 | ||
967 | spin_lock_bh(rt_hash_lock_addr(hash)); | 938 | spin_lock_bh(rt_hash_lock_addr(hash)); |
968 | while ((rth = *rthp) != NULL) { | 939 | while ((rth = *rthp) != NULL) { |
940 | if (rth->rt_genid != atomic_read(&rt_genid)) { | ||
941 | *rthp = rth->u.dst.rt_next; | ||
942 | rt_free(rth); | ||
943 | continue; | ||
944 | } | ||
969 | if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { | 945 | if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { |
970 | /* Put it first */ | 946 | /* Put it first */ |
971 | *rthp = rth->u.dst.rt_next; | 947 | *rthp = rth->u.dst.rt_next; |
@@ -1131,17 +1107,19 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | |||
1131 | 1107 | ||
1132 | static void rt_del(unsigned hash, struct rtable *rt) | 1108 | static void rt_del(unsigned hash, struct rtable *rt) |
1133 | { | 1109 | { |
1134 | struct rtable **rthp; | 1110 | struct rtable **rthp, *aux; |
1135 | 1111 | ||
1112 | rthp = &rt_hash_table[hash].chain; | ||
1136 | spin_lock_bh(rt_hash_lock_addr(hash)); | 1113 | spin_lock_bh(rt_hash_lock_addr(hash)); |
1137 | ip_rt_put(rt); | 1114 | ip_rt_put(rt); |
1138 | for (rthp = &rt_hash_table[hash].chain; *rthp; | 1115 | while ((aux = *rthp) != NULL) { |
1139 | rthp = &(*rthp)->u.dst.rt_next) | 1116 | if (aux == rt || (aux->rt_genid != atomic_read(&rt_genid))) { |
1140 | if (*rthp == rt) { | 1117 | *rthp = aux->u.dst.rt_next; |
1141 | *rthp = rt->u.dst.rt_next; | 1118 | rt_free(aux); |
1142 | rt_free(rt); | 1119 | continue; |
1143 | break; | ||
1144 | } | 1120 | } |
1121 | rthp = &aux->u.dst.rt_next; | ||
1122 | } | ||
1145 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1123 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1146 | } | 1124 | } |
1147 | 1125 | ||
@@ -1186,7 +1164,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1186 | if (rth->fl.fl4_dst != daddr || | 1164 | if (rth->fl.fl4_dst != daddr || |
1187 | rth->fl.fl4_src != skeys[i] || | 1165 | rth->fl.fl4_src != skeys[i] || |
1188 | rth->fl.oif != ikeys[k] || | 1166 | rth->fl.oif != ikeys[k] || |
1189 | rth->fl.iif != 0) { | 1167 | rth->fl.iif != 0 || |
1168 | rth->rt_genid != atomic_read(&rt_genid)) { | ||
1190 | rthp = &rth->u.dst.rt_next; | 1169 | rthp = &rth->u.dst.rt_next; |
1191 | continue; | 1170 | continue; |
1192 | } | 1171 | } |
@@ -1224,7 +1203,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1224 | rt->u.dst.neighbour = NULL; | 1203 | rt->u.dst.neighbour = NULL; |
1225 | rt->u.dst.hh = NULL; | 1204 | rt->u.dst.hh = NULL; |
1226 | rt->u.dst.xfrm = NULL; | 1205 | rt->u.dst.xfrm = NULL; |
1227 | 1206 | rt->rt_genid = atomic_read(&rt_genid); | |
1228 | rt->rt_flags |= RTCF_REDIRECTED; | 1207 | rt->rt_flags |= RTCF_REDIRECTED; |
1229 | 1208 | ||
1230 | /* Gateway is different ... */ | 1209 | /* Gateway is different ... */ |
@@ -1445,7 +1424,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
1445 | rth->rt_src == iph->saddr && | 1424 | rth->rt_src == iph->saddr && |
1446 | rth->fl.iif == 0 && | 1425 | rth->fl.iif == 0 && |
1447 | !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) && | 1426 | !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) && |
1448 | rth->u.dst.dev->nd_net == net) { | 1427 | rth->u.dst.dev->nd_net == net && |
1428 | rth->rt_genid == atomic_read(&rt_genid)) { | ||
1449 | unsigned short mtu = new_mtu; | 1429 | unsigned short mtu = new_mtu; |
1450 | 1430 | ||
1451 | if (new_mtu < 68 || new_mtu >= old_mtu) { | 1431 | if (new_mtu < 68 || new_mtu >= old_mtu) { |
@@ -1680,8 +1660,9 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1680 | rth->fl.oif = 0; | 1660 | rth->fl.oif = 0; |
1681 | rth->rt_gateway = daddr; | 1661 | rth->rt_gateway = daddr; |
1682 | rth->rt_spec_dst= spec_dst; | 1662 | rth->rt_spec_dst= spec_dst; |
1683 | rth->rt_type = RTN_MULTICAST; | 1663 | rth->rt_genid = atomic_read(&rt_genid); |
1684 | rth->rt_flags = RTCF_MULTICAST; | 1664 | rth->rt_flags = RTCF_MULTICAST; |
1665 | rth->rt_type = RTN_MULTICAST; | ||
1685 | if (our) { | 1666 | if (our) { |
1686 | rth->u.dst.input= ip_local_deliver; | 1667 | rth->u.dst.input= ip_local_deliver; |
1687 | rth->rt_flags |= RTCF_LOCAL; | 1668 | rth->rt_flags |= RTCF_LOCAL; |
@@ -1820,6 +1801,7 @@ static inline int __mkroute_input(struct sk_buff *skb, | |||
1820 | 1801 | ||
1821 | rth->u.dst.input = ip_forward; | 1802 | rth->u.dst.input = ip_forward; |
1822 | rth->u.dst.output = ip_output; | 1803 | rth->u.dst.output = ip_output; |
1804 | rth->rt_genid = atomic_read(&rt_genid); | ||
1823 | 1805 | ||
1824 | rt_set_nexthop(rth, res, itag); | 1806 | rt_set_nexthop(rth, res, itag); |
1825 | 1807 | ||
@@ -1980,6 +1962,7 @@ local_input: | |||
1980 | goto e_nobufs; | 1962 | goto e_nobufs; |
1981 | 1963 | ||
1982 | rth->u.dst.output= ip_rt_bug; | 1964 | rth->u.dst.output= ip_rt_bug; |
1965 | rth->rt_genid = atomic_read(&rt_genid); | ||
1983 | 1966 | ||
1984 | atomic_set(&rth->u.dst.__refcnt, 1); | 1967 | atomic_set(&rth->u.dst.__refcnt, 1); |
1985 | rth->u.dst.flags= DST_HOST; | 1968 | rth->u.dst.flags= DST_HOST; |
@@ -2071,7 +2054,8 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2071 | rth->fl.oif == 0 && | 2054 | rth->fl.oif == 0 && |
2072 | rth->fl.mark == skb->mark && | 2055 | rth->fl.mark == skb->mark && |
2073 | rth->fl.fl4_tos == tos && | 2056 | rth->fl.fl4_tos == tos && |
2074 | rth->u.dst.dev->nd_net == net) { | 2057 | rth->u.dst.dev->nd_net == net && |
2058 | rth->rt_genid == atomic_read(&rt_genid)) { | ||
2075 | dst_use(&rth->u.dst, jiffies); | 2059 | dst_use(&rth->u.dst, jiffies); |
2076 | RT_CACHE_STAT_INC(in_hit); | 2060 | RT_CACHE_STAT_INC(in_hit); |
2077 | rcu_read_unlock(); | 2061 | rcu_read_unlock(); |
@@ -2199,6 +2183,7 @@ static inline int __mkroute_output(struct rtable **result, | |||
2199 | rth->rt_spec_dst= fl->fl4_src; | 2183 | rth->rt_spec_dst= fl->fl4_src; |
2200 | 2184 | ||
2201 | rth->u.dst.output=ip_output; | 2185 | rth->u.dst.output=ip_output; |
2186 | rth->rt_genid = atomic_read(&rt_genid); | ||
2202 | 2187 | ||
2203 | RT_CACHE_STAT_INC(out_slow_tot); | 2188 | RT_CACHE_STAT_INC(out_slow_tot); |
2204 | 2189 | ||
@@ -2471,7 +2456,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
2471 | rth->fl.mark == flp->mark && | 2456 | rth->fl.mark == flp->mark && |
2472 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & | 2457 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & |
2473 | (IPTOS_RT_MASK | RTO_ONLINK)) && | 2458 | (IPTOS_RT_MASK | RTO_ONLINK)) && |
2474 | rth->u.dst.dev->nd_net == net) { | 2459 | rth->u.dst.dev->nd_net == net && |
2460 | rth->rt_genid == atomic_read(&rt_genid)) { | ||
2475 | dst_use(&rth->u.dst, jiffies); | 2461 | dst_use(&rth->u.dst, jiffies); |
2476 | RT_CACHE_STAT_INC(out_hit); | 2462 | RT_CACHE_STAT_INC(out_hit); |
2477 | rcu_read_unlock_bh(); | 2463 | rcu_read_unlock_bh(); |
@@ -2498,6 +2484,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { | |||
2498 | .check = ipv4_dst_check, | 2484 | .check = ipv4_dst_check, |
2499 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, | 2485 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, |
2500 | .entry_size = sizeof(struct rtable), | 2486 | .entry_size = sizeof(struct rtable), |
2487 | .entries = ATOMIC_INIT(0), | ||
2501 | }; | 2488 | }; |
2502 | 2489 | ||
2503 | 2490 | ||
@@ -2525,6 +2512,7 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock | |||
2525 | rt->idev = ort->idev; | 2512 | rt->idev = ort->idev; |
2526 | if (rt->idev) | 2513 | if (rt->idev) |
2527 | in_dev_hold(rt->idev); | 2514 | in_dev_hold(rt->idev); |
2515 | rt->rt_genid = atomic_read(&rt_genid); | ||
2528 | rt->rt_flags = ort->rt_flags; | 2516 | rt->rt_flags = ort->rt_flags; |
2529 | rt->rt_type = ort->rt_type; | 2517 | rt->rt_type = ort->rt_type; |
2530 | rt->rt_dst = ort->rt_dst; | 2518 | rt->rt_dst = ort->rt_dst; |
@@ -2779,6 +2767,8 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
2779 | rt = rcu_dereference(rt->u.dst.rt_next), idx++) { | 2767 | rt = rcu_dereference(rt->u.dst.rt_next), idx++) { |
2780 | if (idx < s_idx) | 2768 | if (idx < s_idx) |
2781 | continue; | 2769 | continue; |
2770 | if (rt->rt_genid != atomic_read(&rt_genid)) | ||
2771 | continue; | ||
2782 | skb->dst = dst_clone(&rt->u.dst); | 2772 | skb->dst = dst_clone(&rt->u.dst); |
2783 | if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, | 2773 | if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, |
2784 | cb->nlh->nlmsg_seq, RTM_NEWROUTE, | 2774 | cb->nlh->nlmsg_seq, RTM_NEWROUTE, |
@@ -2848,24 +2838,6 @@ ctl_table ipv4_route_table[] = { | |||
2848 | .strategy = &ipv4_sysctl_rtcache_flush_strategy, | 2838 | .strategy = &ipv4_sysctl_rtcache_flush_strategy, |
2849 | }, | 2839 | }, |
2850 | { | 2840 | { |
2851 | .ctl_name = NET_IPV4_ROUTE_MIN_DELAY, | ||
2852 | .procname = "min_delay", | ||
2853 | .data = &ip_rt_min_delay, | ||
2854 | .maxlen = sizeof(int), | ||
2855 | .mode = 0644, | ||
2856 | .proc_handler = &proc_dointvec_jiffies, | ||
2857 | .strategy = &sysctl_jiffies, | ||
2858 | }, | ||
2859 | { | ||
2860 | .ctl_name = NET_IPV4_ROUTE_MAX_DELAY, | ||
2861 | .procname = "max_delay", | ||
2862 | .data = &ip_rt_max_delay, | ||
2863 | .maxlen = sizeof(int), | ||
2864 | .mode = 0644, | ||
2865 | .proc_handler = &proc_dointvec_jiffies, | ||
2866 | .strategy = &sysctl_jiffies, | ||
2867 | }, | ||
2868 | { | ||
2869 | .ctl_name = NET_IPV4_ROUTE_GC_THRESH, | 2841 | .ctl_name = NET_IPV4_ROUTE_GC_THRESH, |
2870 | .procname = "gc_thresh", | 2842 | .procname = "gc_thresh", |
2871 | .data = &ipv4_dst_ops.gc_thresh, | 2843 | .data = &ipv4_dst_ops.gc_thresh, |
@@ -3023,8 +2995,8 @@ int __init ip_rt_init(void) | |||
3023 | { | 2995 | { |
3024 | int rc = 0; | 2996 | int rc = 0; |
3025 | 2997 | ||
3026 | rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^ | 2998 | atomic_set(&rt_genid, (int) ((num_physpages ^ (num_physpages>>8)) ^ |
3027 | (jiffies ^ (jiffies >> 7))); | 2999 | (jiffies ^ (jiffies >> 7)))); |
3028 | 3000 | ||
3029 | #ifdef CONFIG_NET_CLS_ROUTE | 3001 | #ifdef CONFIG_NET_CLS_ROUTE |
3030 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); | 3002 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); |
@@ -3057,7 +3029,6 @@ int __init ip_rt_init(void) | |||
3057 | devinet_init(); | 3029 | devinet_init(); |
3058 | ip_fib_init(); | 3030 | ip_fib_init(); |
3059 | 3031 | ||
3060 | setup_timer(&rt_flush_timer, rt_run_flush, 0); | ||
3061 | setup_timer(&rt_secret_timer, rt_secret_rebuild, 0); | 3032 | setup_timer(&rt_secret_timer, rt_secret_rebuild, 0); |
3062 | 3033 | ||
3063 | /* All the timers, started at system startup tend | 3034 | /* All the timers, started at system startup tend |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 82cdf23837e3..88286f35d1e2 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -185,7 +185,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam | |||
185 | 185 | ||
186 | tcp_get_available_congestion_control(tbl.data, tbl.maxlen); | 186 | tcp_get_available_congestion_control(tbl.data, tbl.maxlen); |
187 | ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); | 187 | ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); |
188 | if (ret == 0 && newval && newlen) | 188 | if (ret == 1 && newval && newlen) |
189 | ret = tcp_set_allowed_congestion_control(tbl.data); | 189 | ret = tcp_set_allowed_congestion_control(tbl.data); |
190 | kfree(tbl.data); | 190 | kfree(tbl.data); |
191 | 191 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fa2c85ca5bc3..19c449f62672 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -2153,7 +2153,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit) | |||
2153 | tp->lost_skb_hint = skb; | 2153 | tp->lost_skb_hint = skb; |
2154 | tp->lost_cnt_hint = cnt; | 2154 | tp->lost_cnt_hint = cnt; |
2155 | 2155 | ||
2156 | if (tcp_is_fack(tp) || | 2156 | if (tcp_is_fack(tp) || tcp_is_reno(tp) || |
2157 | (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) | 2157 | (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) |
2158 | cnt += tcp_skb_pcount(skb); | 2158 | cnt += tcp_skb_pcount(skb); |
2159 | 2159 | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9aea88b8d4fc..77c1939a2b0d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -369,8 +369,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) | |||
369 | return; | 369 | return; |
370 | } | 370 | } |
371 | 371 | ||
372 | sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, | 372 | sk = inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->daddr, th->dest, |
373 | th->source, inet_iif(skb)); | 373 | iph->saddr, th->source, inet_iif(skb)); |
374 | if (!sk) { | 374 | if (!sk) { |
375 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); | 375 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); |
376 | return; | 376 | return; |
@@ -1503,8 +1503,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
1503 | if (req) | 1503 | if (req) |
1504 | return tcp_check_req(sk, skb, req, prev); | 1504 | return tcp_check_req(sk, skb, req, prev); |
1505 | 1505 | ||
1506 | nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source, | 1506 | nsk = inet_lookup_established(sk->sk_net, &tcp_hashinfo, iph->saddr, |
1507 | iph->daddr, th->dest, inet_iif(skb)); | 1507 | th->source, iph->daddr, th->dest, inet_iif(skb)); |
1508 | 1508 | ||
1509 | if (nsk) { | 1509 | if (nsk) { |
1510 | if (nsk->sk_state != TCP_TIME_WAIT) { | 1510 | if (nsk->sk_state != TCP_TIME_WAIT) { |
@@ -1661,8 +1661,8 @@ int tcp_v4_rcv(struct sk_buff *skb) | |||
1661 | TCP_SKB_CB(skb)->flags = iph->tos; | 1661 | TCP_SKB_CB(skb)->flags = iph->tos; |
1662 | TCP_SKB_CB(skb)->sacked = 0; | 1662 | TCP_SKB_CB(skb)->sacked = 0; |
1663 | 1663 | ||
1664 | sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source, | 1664 | sk = __inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->saddr, |
1665 | iph->daddr, th->dest, inet_iif(skb)); | 1665 | th->source, iph->daddr, th->dest, inet_iif(skb)); |
1666 | if (!sk) | 1666 | if (!sk) |
1667 | goto no_tcp_socket; | 1667 | goto no_tcp_socket; |
1668 | 1668 | ||
@@ -1735,7 +1735,8 @@ do_time_wait: | |||
1735 | } | 1735 | } |
1736 | switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { | 1736 | switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { |
1737 | case TCP_TW_SYN: { | 1737 | case TCP_TW_SYN: { |
1738 | struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, | 1738 | struct sock *sk2 = inet_lookup_listener(skb->dev->nd_net, |
1739 | &tcp_hashinfo, | ||
1739 | iph->daddr, th->dest, | 1740 | iph->daddr, th->dest, |
1740 | inet_iif(skb)); | 1741 | inet_iif(skb)); |
1741 | if (sk2) { | 1742 | if (sk2) { |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 89f0188885c7..ed750f9ceb07 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -2564,5 +2564,4 @@ EXPORT_SYMBOL(tcp_connect); | |||
2564 | EXPORT_SYMBOL(tcp_make_synack); | 2564 | EXPORT_SYMBOL(tcp_make_synack); |
2565 | EXPORT_SYMBOL(tcp_simple_retransmit); | 2565 | EXPORT_SYMBOL(tcp_simple_retransmit); |
2566 | EXPORT_SYMBOL(tcp_sync_mss); | 2566 | EXPORT_SYMBOL(tcp_sync_mss); |
2567 | EXPORT_SYMBOL(sysctl_tcp_tso_win_divisor); | ||
2568 | EXPORT_SYMBOL(tcp_mtup_init); | 2567 | EXPORT_SYMBOL(tcp_mtup_init); |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2fb8d731026b..7ea1b67b6de1 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -130,14 +130,14 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min); | |||
130 | atomic_t udp_memory_allocated; | 130 | atomic_t udp_memory_allocated; |
131 | EXPORT_SYMBOL(udp_memory_allocated); | 131 | EXPORT_SYMBOL(udp_memory_allocated); |
132 | 132 | ||
133 | static inline int __udp_lib_lport_inuse(__u16 num, | 133 | static inline int __udp_lib_lport_inuse(struct net *net, __u16 num, |
134 | const struct hlist_head udptable[]) | 134 | const struct hlist_head udptable[]) |
135 | { | 135 | { |
136 | struct sock *sk; | 136 | struct sock *sk; |
137 | struct hlist_node *node; | 137 | struct hlist_node *node; |
138 | 138 | ||
139 | sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) | 139 | sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) |
140 | if (sk->sk_hash == num) | 140 | if (sk->sk_net == net && sk->sk_hash == num) |
141 | return 1; | 141 | return 1; |
142 | return 0; | 142 | return 0; |
143 | } | 143 | } |
@@ -159,6 +159,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
159 | struct hlist_head *head; | 159 | struct hlist_head *head; |
160 | struct sock *sk2; | 160 | struct sock *sk2; |
161 | int error = 1; | 161 | int error = 1; |
162 | struct net *net = sk->sk_net; | ||
162 | 163 | ||
163 | write_lock_bh(&udp_hash_lock); | 164 | write_lock_bh(&udp_hash_lock); |
164 | 165 | ||
@@ -198,7 +199,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, | |||
198 | /* 2nd pass: find hole in shortest hash chain */ | 199 | /* 2nd pass: find hole in shortest hash chain */ |
199 | rover = best; | 200 | rover = best; |
200 | for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) { | 201 | for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) { |
201 | if (! __udp_lib_lport_inuse(rover, udptable)) | 202 | if (! __udp_lib_lport_inuse(net, rover, udptable)) |
202 | goto gotit; | 203 | goto gotit; |
203 | rover += UDP_HTABLE_SIZE; | 204 | rover += UDP_HTABLE_SIZE; |
204 | if (rover > high) | 205 | if (rover > high) |
@@ -218,6 +219,7 @@ gotit: | |||
218 | sk_for_each(sk2, node, head) | 219 | sk_for_each(sk2, node, head) |
219 | if (sk2->sk_hash == snum && | 220 | if (sk2->sk_hash == snum && |
220 | sk2 != sk && | 221 | sk2 != sk && |
222 | sk2->sk_net == net && | ||
221 | (!sk2->sk_reuse || !sk->sk_reuse) && | 223 | (!sk2->sk_reuse || !sk->sk_reuse) && |
222 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if | 224 | (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if |
223 | || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && | 225 | || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && |
@@ -261,9 +263,9 @@ static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) | |||
261 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try | 263 | /* UDP is nearly always wildcards out the wazoo, it makes no sense to try |
262 | * harder than this. -DaveM | 264 | * harder than this. -DaveM |
263 | */ | 265 | */ |
264 | static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, | 266 | static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, |
265 | __be32 daddr, __be16 dport, | 267 | __be16 sport, __be32 daddr, __be16 dport, |
266 | int dif, struct hlist_head udptable[]) | 268 | int dif, struct hlist_head udptable[]) |
267 | { | 269 | { |
268 | struct sock *sk, *result = NULL; | 270 | struct sock *sk, *result = NULL; |
269 | struct hlist_node *node; | 271 | struct hlist_node *node; |
@@ -274,7 +276,8 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, | |||
274 | sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { | 276 | sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { |
275 | struct inet_sock *inet = inet_sk(sk); | 277 | struct inet_sock *inet = inet_sk(sk); |
276 | 278 | ||
277 | if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) { | 279 | if (sk->sk_net == net && sk->sk_hash == hnum && |
280 | !ipv6_only_sock(sk)) { | ||
278 | int score = (sk->sk_family == PF_INET ? 1 : 0); | 281 | int score = (sk->sk_family == PF_INET ? 1 : 0); |
279 | if (inet->rcv_saddr) { | 282 | if (inet->rcv_saddr) { |
280 | if (inet->rcv_saddr != daddr) | 283 | if (inet->rcv_saddr != daddr) |
@@ -361,8 +364,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) | |||
361 | int harderr; | 364 | int harderr; |
362 | int err; | 365 | int err; |
363 | 366 | ||
364 | sk = __udp4_lib_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, | 367 | sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest, |
365 | skb->dev->ifindex, udptable ); | 368 | iph->saddr, uh->source, skb->dev->ifindex, udptable); |
366 | if (sk == NULL) { | 369 | if (sk == NULL) { |
367 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); | 370 | ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); |
368 | return; /* No socket for error */ | 371 | return; /* No socket for error */ |
@@ -1185,8 +1188,8 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], | |||
1185 | if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) | 1188 | if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) |
1186 | return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); | 1189 | return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); |
1187 | 1190 | ||
1188 | sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, | 1191 | sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr, |
1189 | inet_iif(skb), udptable); | 1192 | uh->dest, inet_iif(skb), udptable); |
1190 | 1193 | ||
1191 | if (sk != NULL) { | 1194 | if (sk != NULL) { |
1192 | int ret = 0; | 1195 | int ret = 0; |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 3783e3ee56a4..10ed70491434 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -247,6 +247,7 @@ static struct dst_ops xfrm4_dst_ops = { | |||
247 | .local_out = __ip_local_out, | 247 | .local_out = __ip_local_out, |
248 | .gc_thresh = 1024, | 248 | .gc_thresh = 1024, |
249 | .entry_size = sizeof(struct xfrm_dst), | 249 | .entry_size = sizeof(struct xfrm_dst), |
250 | .entries = ATOMIC_INIT(0), | ||
250 | }; | 251 | }; |
251 | 252 | ||
252 | static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { | 253 | static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { |
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 326845195620..41f5982d2087 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c | |||
@@ -38,7 +38,7 @@ static void ipip_destroy(struct xfrm_state *x) | |||
38 | { | 38 | { |
39 | } | 39 | } |
40 | 40 | ||
41 | static struct xfrm_type ipip_type = { | 41 | static const struct xfrm_type ipip_type = { |
42 | .description = "IPIP", | 42 | .description = "IPIP", |
43 | .owner = THIS_MODULE, | 43 | .owner = THIS_MODULE, |
44 | .proto = IPPROTO_IPIP, | 44 | .proto = IPPROTO_IPIP, |
@@ -50,7 +50,7 @@ static struct xfrm_type ipip_type = { | |||
50 | 50 | ||
51 | static int xfrm_tunnel_rcv(struct sk_buff *skb) | 51 | static int xfrm_tunnel_rcv(struct sk_buff *skb) |
52 | { | 52 | { |
53 | return xfrm4_rcv_spi(skb, IPPROTO_IP, ip_hdr(skb)->saddr); | 53 | return xfrm4_rcv_spi(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr); |
54 | } | 54 | } |
55 | 55 | ||
56 | static int xfrm_tunnel_err(struct sk_buff *skb, u32 info) | 56 | static int xfrm_tunnel_err(struct sk_buff *skb, u32 info) |