aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/arp.c21
-rw-r--r--net/ipv4/devinet.c25
-rw-r--r--net/ipv4/esp4.c17
-rw-r--r--net/ipv4/fib_frontend.c4
-rw-r--r--net/ipv4/fib_semantics.c4
-rw-r--r--net/ipv4/fib_trie.c25
-rw-r--r--net/ipv4/icmp.c7
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_timewait_sock.c7
-rw-r--r--net/ipv4/ip_gre.c4
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/ipmr.c6
-rw-r--r--net/ipv4/ipvs/ip_vs_app.c2
-rw-r--r--net/ipv4/netfilter/Kconfig21
-rw-r--r--net/ipv4/netfilter/Makefile7
-rw-r--r--net/ipv4/netfilter/arp_tables.c14
-rw-r--r--net/ipv4/netfilter/ip_conntrack_amanda.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c184
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_pptp.c17
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netbios_ns.c4
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c48
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_gre.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c3
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c30
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c35
-rw-r--r--net/ipv4/netfilter/ip_nat_helper.c4
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c25
-rw-r--r--net/ipv4/netfilter/ip_queue.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c17
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c2
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c4
-rw-r--r--net/ipv4/proc.c4
-rw-r--r--net/ipv4/route.c6
-rw-r--r--net/ipv4/tcp_bic.c2
-rw-r--r--net/ipv4/tcp_input.c3
-rw-r--r--net/ipv4/tcp_ipv4.c11
-rw-r--r--net/ipv4/tcp_output.c16
40 files changed, 392 insertions, 209 deletions
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 8bf312bdea13..b425748f02d7 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -241,7 +241,7 @@ static int arp_constructor(struct neighbour *neigh)
241 neigh->type = inet_addr_type(addr); 241 neigh->type = inet_addr_type(addr);
242 242
243 rcu_read_lock(); 243 rcu_read_lock();
244 in_dev = rcu_dereference(__in_dev_get(dev)); 244 in_dev = __in_dev_get_rcu(dev);
245 if (in_dev == NULL) { 245 if (in_dev == NULL) {
246 rcu_read_unlock(); 246 rcu_read_unlock();
247 return -EINVAL; 247 return -EINVAL;
@@ -697,12 +697,6 @@ void arp_send(int type, int ptype, u32 dest_ip,
697 arp_xmit(skb); 697 arp_xmit(skb);
698} 698}
699 699
700static void parp_redo(struct sk_buff *skb)
701{
702 nf_reset(skb);
703 arp_rcv(skb, skb->dev, NULL, skb->dev);
704}
705
706/* 700/*
707 * Process an arp request. 701 * Process an arp request.
708 */ 702 */
@@ -922,6 +916,11 @@ out:
922 return 0; 916 return 0;
923} 917}
924 918
919static void parp_redo(struct sk_buff *skb)
920{
921 arp_process(skb);
922}
923
925 924
926/* 925/*
927 * Receive an arp request from the device layer. 926 * Receive an arp request from the device layer.
@@ -990,8 +989,8 @@ static int arp_req_set(struct arpreq *r, struct net_device * dev)
990 ipv4_devconf.proxy_arp = 1; 989 ipv4_devconf.proxy_arp = 1;
991 return 0; 990 return 0;
992 } 991 }
993 if (__in_dev_get(dev)) { 992 if (__in_dev_get_rtnl(dev)) {
994 __in_dev_get(dev)->cnf.proxy_arp = 1; 993 __in_dev_get_rtnl(dev)->cnf.proxy_arp = 1;
995 return 0; 994 return 0;
996 } 995 }
997 return -ENXIO; 996 return -ENXIO;
@@ -1096,8 +1095,8 @@ static int arp_req_delete(struct arpreq *r, struct net_device * dev)
1096 ipv4_devconf.proxy_arp = 0; 1095 ipv4_devconf.proxy_arp = 0;
1097 return 0; 1096 return 0;
1098 } 1097 }
1099 if (__in_dev_get(dev)) { 1098 if (__in_dev_get_rtnl(dev)) {
1100 __in_dev_get(dev)->cnf.proxy_arp = 0; 1099 __in_dev_get_rtnl(dev)->cnf.proxy_arp = 0;
1101 return 0; 1100 return 0;
1102 } 1101 }
1103 return -ENXIO; 1102 return -ENXIO;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ba2895ae8151..4ec4b2ca6ab1 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -351,7 +351,7 @@ static int inet_insert_ifa(struct in_ifaddr *ifa)
351 351
352static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) 352static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
353{ 353{
354 struct in_device *in_dev = __in_dev_get(dev); 354 struct in_device *in_dev = __in_dev_get_rtnl(dev);
355 355
356 ASSERT_RTNL(); 356 ASSERT_RTNL();
357 357
@@ -449,7 +449,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
449 goto out; 449 goto out;
450 450
451 rc = -ENOBUFS; 451 rc = -ENOBUFS;
452 if ((in_dev = __in_dev_get(dev)) == NULL) { 452 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
453 in_dev = inetdev_init(dev); 453 in_dev = inetdev_init(dev);
454 if (!in_dev) 454 if (!in_dev)
455 goto out; 455 goto out;
@@ -584,7 +584,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
584 if (colon) 584 if (colon)
585 *colon = ':'; 585 *colon = ':';
586 586
587 if ((in_dev = __in_dev_get(dev)) != NULL) { 587 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
588 if (tryaddrmatch) { 588 if (tryaddrmatch) {
589 /* Matthias Andree */ 589 /* Matthias Andree */
590 /* compare label and address (4.4BSD style) */ 590 /* compare label and address (4.4BSD style) */
@@ -715,6 +715,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
715 break; 715 break;
716 ret = 0; 716 ret = 0;
717 if (ifa->ifa_mask != sin->sin_addr.s_addr) { 717 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
718 u32 old_mask = ifa->ifa_mask;
718 inet_del_ifa(in_dev, ifap, 0); 719 inet_del_ifa(in_dev, ifap, 0);
719 ifa->ifa_mask = sin->sin_addr.s_addr; 720 ifa->ifa_mask = sin->sin_addr.s_addr;
720 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); 721 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
@@ -728,7 +729,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
728 if ((dev->flags & IFF_BROADCAST) && 729 if ((dev->flags & IFF_BROADCAST) &&
729 (ifa->ifa_prefixlen < 31) && 730 (ifa->ifa_prefixlen < 31) &&
730 (ifa->ifa_broadcast == 731 (ifa->ifa_broadcast ==
731 (ifa->ifa_local|~ifa->ifa_mask))) { 732 (ifa->ifa_local|~old_mask))) {
732 ifa->ifa_broadcast = (ifa->ifa_local | 733 ifa->ifa_broadcast = (ifa->ifa_local |
733 ~sin->sin_addr.s_addr); 734 ~sin->sin_addr.s_addr);
734 } 735 }
@@ -748,7 +749,7 @@ rarok:
748 749
749static int inet_gifconf(struct net_device *dev, char __user *buf, int len) 750static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
750{ 751{
751 struct in_device *in_dev = __in_dev_get(dev); 752 struct in_device *in_dev = __in_dev_get_rtnl(dev);
752 struct in_ifaddr *ifa; 753 struct in_ifaddr *ifa;
753 struct ifreq ifr; 754 struct ifreq ifr;
754 int done = 0; 755 int done = 0;
@@ -791,7 +792,7 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope)
791 struct in_device *in_dev; 792 struct in_device *in_dev;
792 793
793 rcu_read_lock(); 794 rcu_read_lock();
794 in_dev = __in_dev_get(dev); 795 in_dev = __in_dev_get_rcu(dev);
795 if (!in_dev) 796 if (!in_dev)
796 goto no_in_dev; 797 goto no_in_dev;
797 798
@@ -818,7 +819,7 @@ no_in_dev:
818 read_lock(&dev_base_lock); 819 read_lock(&dev_base_lock);
819 rcu_read_lock(); 820 rcu_read_lock();
820 for (dev = dev_base; dev; dev = dev->next) { 821 for (dev = dev_base; dev; dev = dev->next) {
821 if ((in_dev = __in_dev_get(dev)) == NULL) 822 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
822 continue; 823 continue;
823 824
824 for_primary_ifa(in_dev) { 825 for_primary_ifa(in_dev) {
@@ -887,7 +888,7 @@ u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scop
887 888
888 if (dev) { 889 if (dev) {
889 rcu_read_lock(); 890 rcu_read_lock();
890 if ((in_dev = __in_dev_get(dev))) 891 if ((in_dev = __in_dev_get_rcu(dev)))
891 addr = confirm_addr_indev(in_dev, dst, local, scope); 892 addr = confirm_addr_indev(in_dev, dst, local, scope);
892 rcu_read_unlock(); 893 rcu_read_unlock();
893 894
@@ -897,7 +898,7 @@ u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scop
897 read_lock(&dev_base_lock); 898 read_lock(&dev_base_lock);
898 rcu_read_lock(); 899 rcu_read_lock();
899 for (dev = dev_base; dev; dev = dev->next) { 900 for (dev = dev_base; dev; dev = dev->next) {
900 if ((in_dev = __in_dev_get(dev))) { 901 if ((in_dev = __in_dev_get_rcu(dev))) {
901 addr = confirm_addr_indev(in_dev, dst, local, scope); 902 addr = confirm_addr_indev(in_dev, dst, local, scope);
902 if (addr) 903 if (addr)
903 break; 904 break;
@@ -957,7 +958,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
957 void *ptr) 958 void *ptr)
958{ 959{
959 struct net_device *dev = ptr; 960 struct net_device *dev = ptr;
960 struct in_device *in_dev = __in_dev_get(dev); 961 struct in_device *in_dev = __in_dev_get_rtnl(dev);
961 962
962 ASSERT_RTNL(); 963 ASSERT_RTNL();
963 964
@@ -1078,7 +1079,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1078 if (idx > s_idx) 1079 if (idx > s_idx)
1079 s_ip_idx = 0; 1080 s_ip_idx = 0;
1080 rcu_read_lock(); 1081 rcu_read_lock();
1081 if ((in_dev = __in_dev_get(dev)) == NULL) { 1082 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
1082 rcu_read_unlock(); 1083 rcu_read_unlock();
1083 continue; 1084 continue;
1084 } 1085 }
@@ -1149,7 +1150,7 @@ void inet_forward_change(void)
1149 for (dev = dev_base; dev; dev = dev->next) { 1150 for (dev = dev_base; dev; dev = dev->next) {
1150 struct in_device *in_dev; 1151 struct in_device *in_dev;
1151 rcu_read_lock(); 1152 rcu_read_lock();
1152 in_dev = __in_dev_get(dev); 1153 in_dev = __in_dev_get_rcu(dev);
1153 if (in_dev) 1154 if (in_dev)
1154 in_dev->cnf.forwarding = on; 1155 in_dev->cnf.forwarding = on;
1155 rcu_read_unlock(); 1156 rcu_read_unlock();
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 1b5a09d1b90b..1b18ce66e7b7 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -5,6 +5,7 @@
5#include <net/esp.h> 5#include <net/esp.h>
6#include <asm/scatterlist.h> 6#include <asm/scatterlist.h>
7#include <linux/crypto.h> 7#include <linux/crypto.h>
8#include <linux/kernel.h>
8#include <linux/pfkeyv2.h> 9#include <linux/pfkeyv2.h>
9#include <linux/random.h> 10#include <linux/random.h>
10#include <net/icmp.h> 11#include <net/icmp.h>
@@ -42,10 +43,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
42 esp = x->data; 43 esp = x->data;
43 alen = esp->auth.icv_trunc_len; 44 alen = esp->auth.icv_trunc_len;
44 tfm = esp->conf.tfm; 45 tfm = esp->conf.tfm;
45 blksize = (crypto_tfm_alg_blocksize(tfm) + 3) & ~3; 46 blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4);
46 clen = (clen + 2 + blksize-1)&~(blksize-1); 47 clen = ALIGN(clen + 2, blksize);
47 if (esp->conf.padlen) 48 if (esp->conf.padlen)
48 clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1); 49 clen = ALIGN(clen, esp->conf.padlen);
49 50
50 if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) 51 if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0)
51 goto error; 52 goto error;
@@ -143,7 +144,7 @@ static int esp_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struc
143 struct ip_esp_hdr *esph; 144 struct ip_esp_hdr *esph;
144 struct esp_data *esp = x->data; 145 struct esp_data *esp = x->data;
145 struct sk_buff *trailer; 146 struct sk_buff *trailer;
146 int blksize = crypto_tfm_alg_blocksize(esp->conf.tfm); 147 int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
147 int alen = esp->auth.icv_trunc_len; 148 int alen = esp->auth.icv_trunc_len;
148 int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen; 149 int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen;
149 int nfrags; 150 int nfrags;
@@ -304,16 +305,16 @@ static int esp_post_input(struct xfrm_state *x, struct xfrm_decap_state *decap,
304static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) 305static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
305{ 306{
306 struct esp_data *esp = x->data; 307 struct esp_data *esp = x->data;
307 u32 blksize = crypto_tfm_alg_blocksize(esp->conf.tfm); 308 u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
308 309
309 if (x->props.mode) { 310 if (x->props.mode) {
310 mtu = (mtu + 2 + blksize-1)&~(blksize-1); 311 mtu = ALIGN(mtu + 2, blksize);
311 } else { 312 } else {
312 /* The worst case. */ 313 /* The worst case. */
313 mtu += 2 + blksize; 314 mtu = ALIGN(mtu + 2, 4) + blksize - 4;
314 } 315 }
315 if (esp->conf.padlen) 316 if (esp->conf.padlen)
316 mtu = (mtu + esp->conf.padlen-1)&~(esp->conf.padlen-1); 317 mtu = ALIGN(mtu, esp->conf.padlen);
317 318
318 return mtu + x->props.header_len + esp->auth.icv_trunc_len; 319 return mtu + x->props.header_len + esp->auth.icv_trunc_len;
319} 320}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4e1379f71269..e61bc7177eb1 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -173,7 +173,7 @@ int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
173 173
174 no_addr = rpf = 0; 174 no_addr = rpf = 0;
175 rcu_read_lock(); 175 rcu_read_lock();
176 in_dev = __in_dev_get(dev); 176 in_dev = __in_dev_get_rcu(dev);
177 if (in_dev) { 177 if (in_dev) {
178 no_addr = in_dev->ifa_list == NULL; 178 no_addr = in_dev->ifa_list == NULL;
179 rpf = IN_DEV_RPFILTER(in_dev); 179 rpf = IN_DEV_RPFILTER(in_dev);
@@ -607,7 +607,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
607static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 607static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
608{ 608{
609 struct net_device *dev = ptr; 609 struct net_device *dev = ptr;
610 struct in_device *in_dev = __in_dev_get(dev); 610 struct in_device *in_dev = __in_dev_get_rtnl(dev);
611 611
612 if (event == NETDEV_UNREGISTER) { 612 if (event == NETDEV_UNREGISTER) {
613 fib_disable_ip(dev, 2); 613 fib_disable_ip(dev, 2);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d41219e8037c..186f20c4a45e 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1087,7 +1087,7 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
1087 rta->rta_oif = &dev->ifindex; 1087 rta->rta_oif = &dev->ifindex;
1088 if (colon) { 1088 if (colon) {
1089 struct in_ifaddr *ifa; 1089 struct in_ifaddr *ifa;
1090 struct in_device *in_dev = __in_dev_get(dev); 1090 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1091 if (!in_dev) 1091 if (!in_dev)
1092 return -ENODEV; 1092 return -ENODEV;
1093 *colon = ':'; 1093 *colon = ':';
@@ -1268,7 +1268,7 @@ int fib_sync_up(struct net_device *dev)
1268 } 1268 }
1269 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) 1269 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1270 continue; 1270 continue;
1271 if (nh->nh_dev != dev || __in_dev_get(dev) == NULL) 1271 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
1272 continue; 1272 continue;
1273 alive++; 1273 alive++;
1274 spin_lock_bh(&fib_multipath_lock); 1274 spin_lock_bh(&fib_multipath_lock);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 50c0519cd70d..66247f38b371 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -286,6 +286,8 @@ static inline void check_tnode(const struct tnode *tn)
286 286
287static int halve_threshold = 25; 287static int halve_threshold = 25;
288static int inflate_threshold = 50; 288static int inflate_threshold = 50;
289static int halve_threshold_root = 15;
290static int inflate_threshold_root = 25;
289 291
290 292
291static void __alias_free_mem(struct rcu_head *head) 293static void __alias_free_mem(struct rcu_head *head)
@@ -449,6 +451,8 @@ static struct node *resize(struct trie *t, struct tnode *tn)
449 int i; 451 int i;
450 int err = 0; 452 int err = 0;
451 struct tnode *old_tn; 453 struct tnode *old_tn;
454 int inflate_threshold_use;
455 int halve_threshold_use;
452 456
453 if (!tn) 457 if (!tn)
454 return NULL; 458 return NULL;
@@ -541,10 +545,17 @@ static struct node *resize(struct trie *t, struct tnode *tn)
541 545
542 check_tnode(tn); 546 check_tnode(tn);
543 547
548 /* Keep root node larger */
549
550 if(!tn->parent)
551 inflate_threshold_use = inflate_threshold_root;
552 else
553 inflate_threshold_use = inflate_threshold;
554
544 err = 0; 555 err = 0;
545 while ((tn->full_children > 0 && 556 while ((tn->full_children > 0 &&
546 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= 557 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
547 inflate_threshold * tnode_child_length(tn))) { 558 inflate_threshold_use * tnode_child_length(tn))) {
548 559
549 old_tn = tn; 560 old_tn = tn;
550 tn = inflate(t, tn); 561 tn = inflate(t, tn);
@@ -564,10 +575,18 @@ static struct node *resize(struct trie *t, struct tnode *tn)
564 * node is above threshold. 575 * node is above threshold.
565 */ 576 */
566 577
578
579 /* Keep root node larger */
580
581 if(!tn->parent)
582 halve_threshold_use = halve_threshold_root;
583 else
584 halve_threshold_use = halve_threshold;
585
567 err = 0; 586 err = 0;
568 while (tn->bits > 1 && 587 while (tn->bits > 1 &&
569 100 * (tnode_child_length(tn) - tn->empty_children) < 588 100 * (tnode_child_length(tn) - tn->empty_children) <
570 halve_threshold * tnode_child_length(tn)) { 589 halve_threshold_use * tnode_child_length(tn)) {
571 590
572 old_tn = tn; 591 old_tn = tn;
573 tn = halve(t, tn); 592 tn = halve(t, tn);
@@ -2385,7 +2404,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
2385 prefix = htonl(l->key); 2404 prefix = htonl(l->key);
2386 2405
2387 list_for_each_entry_rcu(fa, &li->falh, fa_list) { 2406 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2388 const struct fib_info *fi = rcu_dereference(fa->fa_info); 2407 const struct fib_info *fi = fa->fa_info;
2389 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); 2408 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
2390 2409
2391 if (fa->fa_type == RTN_BROADCAST 2410 if (fa->fa_type == RTN_BROADCAST
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 24eb56ae1b5a..175e093ec564 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -188,7 +188,7 @@ struct icmp_err icmp_err_convert[] = {
188 188
189/* Control parameters for ECHO replies. */ 189/* Control parameters for ECHO replies. */
190int sysctl_icmp_echo_ignore_all; 190int sysctl_icmp_echo_ignore_all;
191int sysctl_icmp_echo_ignore_broadcasts; 191int sysctl_icmp_echo_ignore_broadcasts = 1;
192 192
193/* Control parameter - ignore bogus broadcast responses? */ 193/* Control parameter - ignore bogus broadcast responses? */
194int sysctl_icmp_ignore_bogus_error_responses; 194int sysctl_icmp_ignore_bogus_error_responses;
@@ -1108,12 +1108,9 @@ void __init icmp_init(struct net_proto_family *ops)
1108 struct inet_sock *inet; 1108 struct inet_sock *inet;
1109 int i; 1109 int i;
1110 1110
1111 for (i = 0; i < NR_CPUS; i++) { 1111 for_each_cpu(i) {
1112 int err; 1112 int err;
1113 1113
1114 if (!cpu_possible(i))
1115 continue;
1116
1117 err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, 1114 err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP,
1118 &per_cpu(__icmp_socket, i)); 1115 &per_cpu(__icmp_socket, i));
1119 1116
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 70c44e4c3ceb..8b6d3939e1e6 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1323,7 +1323,7 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
1323 } 1323 }
1324 if (dev) { 1324 if (dev) {
1325 imr->imr_ifindex = dev->ifindex; 1325 imr->imr_ifindex = dev->ifindex;
1326 idev = __in_dev_get(dev); 1326 idev = __in_dev_get_rtnl(dev);
1327 } 1327 }
1328 return idev; 1328 return idev;
1329} 1329}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index fe3c6d3d0c91..94468a76c5b4 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -494,7 +494,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
494EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); 494EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
495 495
496struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, 496struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
497 const unsigned int __nocast priority) 497 const gfp_t priority)
498{ 498{
499 struct sock *newsk = sk_clone(sk, priority); 499 struct sock *newsk = sk_clone(sk, priority);
500 500
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 4d1502a49852..a010e9a68811 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -20,7 +20,7 @@ void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashi
20 struct inet_bind_hashbucket *bhead; 20 struct inet_bind_hashbucket *bhead;
21 struct inet_bind_bucket *tb; 21 struct inet_bind_bucket *tb;
22 /* Unlink from established hashes. */ 22 /* Unlink from established hashes. */
23 struct inet_ehash_bucket *ehead = &hashinfo->ehash[tw->tw_hashent]; 23 struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, tw->tw_hash);
24 24
25 write_lock(&ehead->lock); 25 write_lock(&ehead->lock);
26 if (hlist_unhashed(&tw->tw_node)) { 26 if (hlist_unhashed(&tw->tw_node)) {
@@ -60,7 +60,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
60{ 60{
61 const struct inet_sock *inet = inet_sk(sk); 61 const struct inet_sock *inet = inet_sk(sk);
62 const struct inet_connection_sock *icsk = inet_csk(sk); 62 const struct inet_connection_sock *icsk = inet_csk(sk);
63 struct inet_ehash_bucket *ehead = &hashinfo->ehash[sk->sk_hashent]; 63 struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
64 struct inet_bind_hashbucket *bhead; 64 struct inet_bind_hashbucket *bhead;
65 /* Step 1: Put TW into bind hash. Original socket stays there too. 65 /* Step 1: Put TW into bind hash. Original socket stays there too.
66 Note, that any socket with inet->num != 0 MUST be bound in 66 Note, that any socket with inet->num != 0 MUST be bound in
@@ -106,11 +106,12 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
106 tw->tw_dport = inet->dport; 106 tw->tw_dport = inet->dport;
107 tw->tw_family = sk->sk_family; 107 tw->tw_family = sk->sk_family;
108 tw->tw_reuse = sk->sk_reuse; 108 tw->tw_reuse = sk->sk_reuse;
109 tw->tw_hashent = sk->sk_hashent; 109 tw->tw_hash = sk->sk_hash;
110 tw->tw_ipv6only = 0; 110 tw->tw_ipv6only = 0;
111 tw->tw_prot = sk->sk_prot_creator; 111 tw->tw_prot = sk->sk_prot_creator;
112 atomic_set(&tw->tw_refcnt, 1); 112 atomic_set(&tw->tw_refcnt, 1);
113 inet_twsk_dead_node_init(tw); 113 inet_twsk_dead_node_init(tw);
114 __module_get(tw->tw_prot->owner);
114 } 115 }
115 116
116 return tw; 117 return tw;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index f0d5740d7e22..896ce3f8f53a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1104,10 +1104,10 @@ static int ipgre_open(struct net_device *dev)
1104 return -EADDRNOTAVAIL; 1104 return -EADDRNOTAVAIL;
1105 dev = rt->u.dst.dev; 1105 dev = rt->u.dst.dev;
1106 ip_rt_put(rt); 1106 ip_rt_put(rt);
1107 if (__in_dev_get(dev) == NULL) 1107 if (__in_dev_get_rtnl(dev) == NULL)
1108 return -EADDRNOTAVAIL; 1108 return -EADDRNOTAVAIL;
1109 t->mlink = dev->ifindex; 1109 t->mlink = dev->ifindex;
1110 ip_mc_inc_group(__in_dev_get(dev), t->parms.iph.daddr); 1110 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1111 } 1111 }
1112 return 0; 1112 return 0;
1113} 1113}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3f1a263e1249..87e350069abb 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -391,6 +391,9 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
391 to->nfct = from->nfct; 391 to->nfct = from->nfct;
392 nf_conntrack_get(to->nfct); 392 nf_conntrack_get(to->nfct);
393 to->nfctinfo = from->nfctinfo; 393 to->nfctinfo = from->nfctinfo;
394#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
395 to->ipvs_property = from->ipvs_property;
396#endif
394#ifdef CONFIG_BRIDGE_NETFILTER 397#ifdef CONFIG_BRIDGE_NETFILTER
395 nf_bridge_put(to->nf_bridge); 398 nf_bridge_put(to->nf_bridge);
396 to->nf_bridge = from->nf_bridge; 399 to->nf_bridge = from->nf_bridge;
@@ -1020,10 +1023,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1020 int alloclen; 1023 int alloclen;
1021 1024
1022 skb_prev = skb; 1025 skb_prev = skb;
1023 if (skb_prev) 1026 fraggap = skb_prev->len - maxfraglen;
1024 fraggap = skb_prev->len - maxfraglen;
1025 else
1026 fraggap = 0;
1027 1027
1028 alloclen = fragheaderlen + hh_len + fraggap + 15; 1028 alloclen = fragheaderlen + hh_len + fraggap + 15;
1029 skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation); 1029 skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9dbf5909f3a6..302b7eb507c9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -149,7 +149,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v)
149 if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) { 149 if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
150 dev->flags |= IFF_MULTICAST; 150 dev->flags |= IFF_MULTICAST;
151 151
152 in_dev = __in_dev_get(dev); 152 in_dev = __in_dev_get_rtnl(dev);
153 if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL) 153 if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
154 goto failure; 154 goto failure;
155 in_dev->cnf.rp_filter = 0; 155 in_dev->cnf.rp_filter = 0;
@@ -278,7 +278,7 @@ static int vif_delete(int vifi)
278 278
279 dev_set_allmulti(dev, -1); 279 dev_set_allmulti(dev, -1);
280 280
281 if ((in_dev = __in_dev_get(dev)) != NULL) { 281 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
282 in_dev->cnf.mc_forwarding--; 282 in_dev->cnf.mc_forwarding--;
283 ip_rt_multicast_event(in_dev); 283 ip_rt_multicast_event(in_dev);
284 } 284 }
@@ -421,7 +421,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
421 return -EINVAL; 421 return -EINVAL;
422 } 422 }
423 423
424 if ((in_dev = __in_dev_get(dev)) == NULL) 424 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
425 return -EADDRNOTAVAIL; 425 return -EADDRNOTAVAIL;
426 in_dev->cnf.mc_forwarding++; 426 in_dev->cnf.mc_forwarding++;
427 dev_set_allmulti(dev, +1); 427 dev_set_allmulti(dev, +1);
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 6e092dadb388..fc6f95aaa969 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -604,7 +604,7 @@ static struct file_operations ip_vs_app_fops = {
604/* 604/*
605 * Replace a segment of data with a new segment 605 * Replace a segment of data with a new segment
606 */ 606 */
607int ip_vs_skb_replace(struct sk_buff *skb, int pri, 607int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
608 char *o_buf, int o_len, char *n_buf, int n_len) 608 char *o_buf, int o_len, char *n_buf, int n_len)
609{ 609{
610 struct iphdr *iph; 610 struct iphdr *iph;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 3cf9b451675c..7d917e4ce1d9 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -139,9 +139,10 @@ config IP_NF_AMANDA
139 139
140config IP_NF_PPTP 140config IP_NF_PPTP
141 tristate 'PPTP protocol support' 141 tristate 'PPTP protocol support'
142 depends on IP_NF_CONNTRACK
142 help 143 help
143 This module adds support for PPTP (Point to Point Tunnelling 144 This module adds support for PPTP (Point to Point Tunnelling
144 Protocol, RFC2637) conncection tracking and NAT. 145 Protocol, RFC2637) connection tracking and NAT.
145 146
146 If you are running PPTP sessions over a stateful firewall or NAT 147 If you are running PPTP sessions over a stateful firewall or NAT
147 box, you may want to enable this feature. 148 box, you may want to enable this feature.
@@ -498,9 +499,14 @@ config IP_NF_TARGET_LOG
498 To compile it as a module, choose M here. If unsure, say N. 499 To compile it as a module, choose M here. If unsure, say N.
499 500
500config IP_NF_TARGET_ULOG 501config IP_NF_TARGET_ULOG
501 tristate "ULOG target support" 502 tristate "ULOG target support (OBSOLETE)"
502 depends on IP_NF_IPTABLES 503 depends on IP_NF_IPTABLES
503 ---help--- 504 ---help---
505
506 This option enables the old IPv4-only "ipt_ULOG" implementation
507 which has been obsoleted by the new "nfnetlink_log" code (see
508 CONFIG_NETFILTER_NETLINK_LOG).
509
504 This option adds a `ULOG' target, which allows you to create rules in 510 This option adds a `ULOG' target, which allows you to create rules in
505 any iptables table. The packet is passed to a userspace logging 511 any iptables table. The packet is passed to a userspace logging
506 daemon using netlink multicast sockets; unlike the LOG target 512 daemon using netlink multicast sockets; unlike the LOG target
@@ -537,6 +543,17 @@ config IP_NF_TARGET_TCPMSS
537 543
538 To compile it as a module, choose M here. If unsure, say N. 544 To compile it as a module, choose M here. If unsure, say N.
539 545
546config IP_NF_TARGET_NFQUEUE
547 tristate "NFQUEUE Target Support"
548 depends on IP_NF_IPTABLES
549 help
550 This Target replaced the old obsolete QUEUE target.
551
552 As opposed to QUEUE, it supports 65535 different queues,
553 not just one.
554
555 To compile it as a module, choose M here. If unsure, say N.
556
540# NAT + specific targets 557# NAT + specific targets
541config IP_NF_NAT 558config IP_NF_NAT
542 tristate "Full NAT" 559 tristate "Full NAT"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 3d45d3c0283c..dab4b58dd31e 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -4,7 +4,8 @@
4 4
5# objects for the standalone - connection tracking / NAT 5# objects for the standalone - connection tracking / NAT
6ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o 6ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
7iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o 7ip_nat-objs := ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
8iptable_nat-objs := ip_nat_rule.o ip_nat_standalone.o
8 9
9ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o 10ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o
10ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o 11ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o
@@ -40,7 +41,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
40# the three instances of ip_tables 41# the three instances of ip_tables
41obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o 42obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
42obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o 43obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
43obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o 44obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o ip_nat.o
44obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o 45obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
45 46
46# matches 47# matches
@@ -92,6 +93,7 @@ obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o
92obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o 93obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o
93obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o 94obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
94obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o 95obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
96obj-$(CONFIG_IP_NF_TARGET_NFQUEUE) += ipt_NFQUEUE.o
95 97
96# generic ARP tables 98# generic ARP tables
97obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o 99obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
@@ -101,4 +103,3 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
101obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o 103obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
102 104
103obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o 105obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
104obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ipt_NFQUEUE.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index fa1634256680..a7969286e6e7 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -716,8 +716,10 @@ static int translate_table(const char *name,
716 } 716 }
717 717
718 /* And one copy for every other CPU */ 718 /* And one copy for every other CPU */
719 for (i = 1; i < num_possible_cpus(); i++) { 719 for_each_cpu(i) {
720 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, 720 if (i == 0)
721 continue;
722 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
721 newinfo->entries, 723 newinfo->entries,
722 SMP_ALIGN(newinfo->size)); 724 SMP_ALIGN(newinfo->size));
723 } 725 }
@@ -767,7 +769,7 @@ static void get_counters(const struct arpt_table_info *t,
767 unsigned int cpu; 769 unsigned int cpu;
768 unsigned int i; 770 unsigned int i;
769 771
770 for (cpu = 0; cpu < num_possible_cpus(); cpu++) { 772 for_each_cpu(cpu) {
771 i = 0; 773 i = 0;
772 ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), 774 ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
773 t->size, 775 t->size,
@@ -885,7 +887,8 @@ static int do_replace(void __user *user, unsigned int len)
885 return -ENOMEM; 887 return -ENOMEM;
886 888
887 newinfo = vmalloc(sizeof(struct arpt_table_info) 889 newinfo = vmalloc(sizeof(struct arpt_table_info)
888 + SMP_ALIGN(tmp.size) * num_possible_cpus()); 890 + SMP_ALIGN(tmp.size) *
891 (highest_possible_processor_id()+1));
889 if (!newinfo) 892 if (!newinfo)
890 return -ENOMEM; 893 return -ENOMEM;
891 894
@@ -1158,7 +1161,8 @@ int arpt_register_table(struct arpt_table *table,
1158 = { 0, 0, 0, { 0 }, { 0 }, { } }; 1161 = { 0, 0, 0, { 0 }, { 0 }, { } };
1159 1162
1160 newinfo = vmalloc(sizeof(struct arpt_table_info) 1163 newinfo = vmalloc(sizeof(struct arpt_table_info)
1161 + SMP_ALIGN(repl->size) * num_possible_cpus()); 1164 + SMP_ALIGN(repl->size) *
1165 (highest_possible_processor_id()+1));
1162 if (!newinfo) { 1166 if (!newinfo) {
1163 ret = -ENOMEM; 1167 ret = -ENOMEM;
1164 return ret; 1168 return ret;
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index dc20881004bc..fa3f914117ec 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -65,7 +65,7 @@ static int help(struct sk_buff **pskb,
65 65
66 /* increase the UDP timeout of the master connection as replies from 66 /* increase the UDP timeout of the master connection as replies from
67 * Amanda clients to the server can be quite delayed */ 67 * Amanda clients to the server can be quite delayed */
68 ip_ct_refresh_acct(ct, ctinfo, NULL, master_timeout * HZ); 68 ip_ct_refresh(ct, *pskb, master_timeout * HZ);
69 69
70 /* No data? */ 70 /* No data? */
71 dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); 71 dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index c1f82e0c81cf..422ab68ee7fb 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -50,7 +50,7 @@
50#include <linux/netfilter_ipv4/ip_conntrack_core.h> 50#include <linux/netfilter_ipv4/ip_conntrack_core.h>
51#include <linux/netfilter_ipv4/listhelp.h> 51#include <linux/netfilter_ipv4/listhelp.h>
52 52
53#define IP_CONNTRACK_VERSION "2.3" 53#define IP_CONNTRACK_VERSION "2.4"
54 54
55#if 0 55#if 0
56#define DEBUGP printk 56#define DEBUGP printk
@@ -148,16 +148,20 @@ DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
148static int ip_conntrack_hash_rnd_initted; 148static int ip_conntrack_hash_rnd_initted;
149static unsigned int ip_conntrack_hash_rnd; 149static unsigned int ip_conntrack_hash_rnd;
150 150
151static u_int32_t 151static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
152hash_conntrack(const struct ip_conntrack_tuple *tuple) 152 unsigned int size, unsigned int rnd)
153{ 153{
154#if 0
155 dump_tuple(tuple);
156#endif
157 return (jhash_3words(tuple->src.ip, 154 return (jhash_3words(tuple->src.ip,
158 (tuple->dst.ip ^ tuple->dst.protonum), 155 (tuple->dst.ip ^ tuple->dst.protonum),
159 (tuple->src.u.all | (tuple->dst.u.all << 16)), 156 (tuple->src.u.all | (tuple->dst.u.all << 16)),
160 ip_conntrack_hash_rnd) % ip_conntrack_htable_size); 157 rnd) % size);
158}
159
160static u_int32_t
161hash_conntrack(const struct ip_conntrack_tuple *tuple)
162{
163 return __hash_conntrack(tuple, ip_conntrack_htable_size,
164 ip_conntrack_hash_rnd);
161} 165}
162 166
163int 167int
@@ -1112,45 +1116,49 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
1112 synchronize_net(); 1116 synchronize_net();
1113} 1117}
1114 1118
1115static inline void ct_add_counters(struct ip_conntrack *ct, 1119/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1116 enum ip_conntrack_info ctinfo, 1120void __ip_ct_refresh_acct(struct ip_conntrack *ct,
1117 const struct sk_buff *skb)
1118{
1119#ifdef CONFIG_IP_NF_CT_ACCT
1120 if (skb) {
1121 ct->counters[CTINFO2DIR(ctinfo)].packets++;
1122 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
1123 ntohs(skb->nh.iph->tot_len);
1124 }
1125#endif
1126}
1127
1128/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */
1129void ip_ct_refresh_acct(struct ip_conntrack *ct,
1130 enum ip_conntrack_info ctinfo, 1121 enum ip_conntrack_info ctinfo,
1131 const struct sk_buff *skb, 1122 const struct sk_buff *skb,
1132 unsigned long extra_jiffies) 1123 unsigned long extra_jiffies,
1124 int do_acct)
1133{ 1125{
1126 int event = 0;
1127
1134 IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); 1128 IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
1129 IP_NF_ASSERT(skb);
1130
1131 write_lock_bh(&ip_conntrack_lock);
1135 1132
1136 /* If not in hash table, timer will not be active yet */ 1133 /* If not in hash table, timer will not be active yet */
1137 if (!is_confirmed(ct)) { 1134 if (!is_confirmed(ct)) {
1138 ct->timeout.expires = extra_jiffies; 1135 ct->timeout.expires = extra_jiffies;
1139 ct_add_counters(ct, ctinfo, skb); 1136 event = IPCT_REFRESH;
1140 } else { 1137 } else {
1141 write_lock_bh(&ip_conntrack_lock);
1142 /* Need del_timer for race avoidance (may already be dying). */ 1138 /* Need del_timer for race avoidance (may already be dying). */
1143 if (del_timer(&ct->timeout)) { 1139 if (del_timer(&ct->timeout)) {
1144 ct->timeout.expires = jiffies + extra_jiffies; 1140 ct->timeout.expires = jiffies + extra_jiffies;
1145 add_timer(&ct->timeout); 1141 add_timer(&ct->timeout);
1146 /* FIXME: We loose some REFRESH events if this function 1142 event = IPCT_REFRESH;
1147 * is called without an skb. I'll fix this later -HW */
1148 if (skb)
1149 ip_conntrack_event_cache(IPCT_REFRESH, skb);
1150 } 1143 }
1151 ct_add_counters(ct, ctinfo, skb);
1152 write_unlock_bh(&ip_conntrack_lock);
1153 } 1144 }
1145
1146#ifdef CONFIG_IP_NF_CT_ACCT
1147 if (do_acct) {
1148 ct->counters[CTINFO2DIR(ctinfo)].packets++;
1149 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
1150 ntohs(skb->nh.iph->tot_len);
1151 if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
1152 || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
1153 event |= IPCT_COUNTER_FILLING;
1154 }
1155#endif
1156
1157 write_unlock_bh(&ip_conntrack_lock);
1158
1159 /* must be unlocked when calling event cache */
1160 if (event)
1161 ip_conntrack_event_cache(event, skb);
1154} 1162}
1155 1163
1156#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ 1164#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
@@ -1337,14 +1345,13 @@ static int kill_all(struct ip_conntrack *i, void *data)
1337 return 1; 1345 return 1;
1338} 1346}
1339 1347
1340static void free_conntrack_hash(void) 1348static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
1341{ 1349{
1342 if (ip_conntrack_vmalloc) 1350 if (vmalloced)
1343 vfree(ip_conntrack_hash); 1351 vfree(hash);
1344 else 1352 else
1345 free_pages((unsigned long)ip_conntrack_hash, 1353 free_pages((unsigned long)hash,
1346 get_order(sizeof(struct list_head) 1354 get_order(sizeof(struct list_head) * size));
1347 * ip_conntrack_htable_size));
1348} 1355}
1349 1356
1350void ip_conntrack_flush() 1357void ip_conntrack_flush()
@@ -1374,12 +1381,83 @@ void ip_conntrack_cleanup(void)
1374 ip_conntrack_flush(); 1381 ip_conntrack_flush();
1375 kmem_cache_destroy(ip_conntrack_cachep); 1382 kmem_cache_destroy(ip_conntrack_cachep);
1376 kmem_cache_destroy(ip_conntrack_expect_cachep); 1383 kmem_cache_destroy(ip_conntrack_expect_cachep);
1377 free_conntrack_hash(); 1384 free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
1385 ip_conntrack_htable_size);
1378 nf_unregister_sockopt(&so_getorigdst); 1386 nf_unregister_sockopt(&so_getorigdst);
1379} 1387}
1380 1388
1381static int hashsize; 1389static struct list_head *alloc_hashtable(int size, int *vmalloced)
1382module_param(hashsize, int, 0400); 1390{
1391 struct list_head *hash;
1392 unsigned int i;
1393
1394 *vmalloced = 0;
1395 hash = (void*)__get_free_pages(GFP_KERNEL,
1396 get_order(sizeof(struct list_head)
1397 * size));
1398 if (!hash) {
1399 *vmalloced = 1;
1400 printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
1401 hash = vmalloc(sizeof(struct list_head) * size);
1402 }
1403
1404 if (hash)
1405 for (i = 0; i < size; i++)
1406 INIT_LIST_HEAD(&hash[i]);
1407
1408 return hash;
1409}
1410
1411int set_hashsize(const char *val, struct kernel_param *kp)
1412{
1413 int i, bucket, hashsize, vmalloced;
1414 int old_vmalloced, old_size;
1415 int rnd;
1416 struct list_head *hash, *old_hash;
1417 struct ip_conntrack_tuple_hash *h;
1418
1419 /* On boot, we can set this without any fancy locking. */
1420 if (!ip_conntrack_htable_size)
1421 return param_set_int(val, kp);
1422
1423 hashsize = simple_strtol(val, NULL, 0);
1424 if (!hashsize)
1425 return -EINVAL;
1426
1427 hash = alloc_hashtable(hashsize, &vmalloced);
1428 if (!hash)
1429 return -ENOMEM;
1430
1431 /* We have to rehash for the new table anyway, so we also can
1432 * use a new random seed */
1433 get_random_bytes(&rnd, 4);
1434
1435 write_lock_bh(&ip_conntrack_lock);
1436 for (i = 0; i < ip_conntrack_htable_size; i++) {
1437 while (!list_empty(&ip_conntrack_hash[i])) {
1438 h = list_entry(ip_conntrack_hash[i].next,
1439 struct ip_conntrack_tuple_hash, list);
1440 list_del(&h->list);
1441 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1442 list_add_tail(&h->list, &hash[bucket]);
1443 }
1444 }
1445 old_size = ip_conntrack_htable_size;
1446 old_vmalloced = ip_conntrack_vmalloc;
1447 old_hash = ip_conntrack_hash;
1448
1449 ip_conntrack_htable_size = hashsize;
1450 ip_conntrack_vmalloc = vmalloced;
1451 ip_conntrack_hash = hash;
1452 ip_conntrack_hash_rnd = rnd;
1453 write_unlock_bh(&ip_conntrack_lock);
1454
1455 free_conntrack_hash(old_hash, old_vmalloced, old_size);
1456 return 0;
1457}
1458
1459module_param_call(hashsize, set_hashsize, param_get_uint,
1460 &ip_conntrack_htable_size, 0600);
1383 1461
1384int __init ip_conntrack_init(void) 1462int __init ip_conntrack_init(void)
1385{ 1463{
@@ -1388,9 +1466,7 @@ int __init ip_conntrack_init(void)
1388 1466
1389 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB 1467 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1390 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ 1468 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1391 if (hashsize) { 1469 if (!ip_conntrack_htable_size) {
1392 ip_conntrack_htable_size = hashsize;
1393 } else {
1394 ip_conntrack_htable_size 1470 ip_conntrack_htable_size
1395 = (((num_physpages << PAGE_SHIFT) / 16384) 1471 = (((num_physpages << PAGE_SHIFT) / 16384)
1396 / sizeof(struct list_head)); 1472 / sizeof(struct list_head));
@@ -1412,20 +1488,8 @@ int __init ip_conntrack_init(void)
1412 return ret; 1488 return ret;
1413 } 1489 }
1414 1490
1415 /* AK: the hash table is twice as big than needed because it 1491 ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
1416 uses list_head. it would be much nicer to caches to use a 1492 &ip_conntrack_vmalloc);
1417 single pointer list head here. */
1418 ip_conntrack_vmalloc = 0;
1419 ip_conntrack_hash
1420 =(void*)__get_free_pages(GFP_KERNEL,
1421 get_order(sizeof(struct list_head)
1422 *ip_conntrack_htable_size));
1423 if (!ip_conntrack_hash) {
1424 ip_conntrack_vmalloc = 1;
1425 printk(KERN_WARNING "ip_conntrack: falling back to vmalloc.\n");
1426 ip_conntrack_hash = vmalloc(sizeof(struct list_head)
1427 * ip_conntrack_htable_size);
1428 }
1429 if (!ip_conntrack_hash) { 1493 if (!ip_conntrack_hash) {
1430 printk(KERN_ERR "Unable to create ip_conntrack_hash\n"); 1494 printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1431 goto err_unreg_sockopt; 1495 goto err_unreg_sockopt;
@@ -1457,9 +1521,6 @@ int __init ip_conntrack_init(void)
1457 ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp; 1521 ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
1458 write_unlock_bh(&ip_conntrack_lock); 1522 write_unlock_bh(&ip_conntrack_lock);
1459 1523
1460 for (i = 0; i < ip_conntrack_htable_size; i++)
1461 INIT_LIST_HEAD(&ip_conntrack_hash[i]);
1462
1463 /* For use by ipt_REJECT */ 1524 /* For use by ipt_REJECT */
1464 ip_ct_attach = ip_conntrack_attach; 1525 ip_ct_attach = ip_conntrack_attach;
1465 1526
@@ -1474,7 +1535,8 @@ int __init ip_conntrack_init(void)
1474err_free_conntrack_slab: 1535err_free_conntrack_slab:
1475 kmem_cache_destroy(ip_conntrack_cachep); 1536 kmem_cache_destroy(ip_conntrack_cachep);
1476err_free_hash: 1537err_free_hash:
1477 free_conntrack_hash(); 1538 free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
1539 ip_conntrack_htable_size);
1478err_unreg_sockopt: 1540err_unreg_sockopt:
1479 nf_unregister_sockopt(&so_getorigdst); 1541 nf_unregister_sockopt(&so_getorigdst);
1480 1542
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 79db5b70d5f6..926a6684643d 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -172,7 +172,6 @@ static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t)
172 DEBUGP("setting timeout of conntrack %p to 0\n", sibling); 172 DEBUGP("setting timeout of conntrack %p to 0\n", sibling);
173 sibling->proto.gre.timeout = 0; 173 sibling->proto.gre.timeout = 0;
174 sibling->proto.gre.stream_timeout = 0; 174 sibling->proto.gre.stream_timeout = 0;
175 /* refresh_acct will not modify counters if skb == NULL */
176 if (del_timer(&sibling->timeout)) 175 if (del_timer(&sibling->timeout))
177 sibling->timeout.function((unsigned long)sibling); 176 sibling->timeout.function((unsigned long)sibling);
178 ip_conntrack_put(sibling); 177 ip_conntrack_put(sibling);
@@ -223,8 +222,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
223static inline int 222static inline int
224exp_gre(struct ip_conntrack *master, 223exp_gre(struct ip_conntrack *master,
225 u_int32_t seq, 224 u_int32_t seq,
226 u_int16_t callid, 225 __be16 callid,
227 u_int16_t peer_callid) 226 __be16 peer_callid)
228{ 227{
229 struct ip_conntrack_tuple inv_tuple; 228 struct ip_conntrack_tuple inv_tuple;
230 struct ip_conntrack_tuple exp_tuples[] = { 229 struct ip_conntrack_tuple exp_tuples[] = {
@@ -263,7 +262,7 @@ exp_gre(struct ip_conntrack *master,
263 exp_orig->mask.src.ip = 0xffffffff; 262 exp_orig->mask.src.ip = 0xffffffff;
264 exp_orig->mask.src.u.all = 0; 263 exp_orig->mask.src.u.all = 0;
265 exp_orig->mask.dst.u.all = 0; 264 exp_orig->mask.dst.u.all = 0;
266 exp_orig->mask.dst.u.gre.key = 0xffff; 265 exp_orig->mask.dst.u.gre.key = htons(0xffff);
267 exp_orig->mask.dst.ip = 0xffffffff; 266 exp_orig->mask.dst.ip = 0xffffffff;
268 exp_orig->mask.dst.protonum = 0xff; 267 exp_orig->mask.dst.protonum = 0xff;
269 268
@@ -340,7 +339,8 @@ pptp_inbound_pkt(struct sk_buff **pskb,
340 unsigned int reqlen; 339 unsigned int reqlen;
341 union pptp_ctrl_union _pptpReq, *pptpReq; 340 union pptp_ctrl_union _pptpReq, *pptpReq;
342 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; 341 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
343 u_int16_t msg, *cid, *pcid; 342 u_int16_t msg;
343 __be16 *cid, *pcid;
344 u_int32_t seq; 344 u_int32_t seq;
345 345
346 ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); 346 ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
@@ -485,7 +485,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
485 485
486 if (info->pns_call_id != ntohs(*pcid)) { 486 if (info->pns_call_id != ntohs(*pcid)) {
487 DEBUGP("%s for unknown CallID %u\n", 487 DEBUGP("%s for unknown CallID %u\n",
488 pptp_msg_name[msg], ntohs(*cid)); 488 pptp_msg_name[msg], ntohs(*pcid));
489 break; 489 break;
490 } 490 }
491 491
@@ -551,7 +551,8 @@ pptp_outbound_pkt(struct sk_buff **pskb,
551 unsigned int reqlen; 551 unsigned int reqlen;
552 union pptp_ctrl_union _pptpReq, *pptpReq; 552 union pptp_ctrl_union _pptpReq, *pptpReq;
553 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; 553 struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
554 u_int16_t msg, *cid, *pcid; 554 u_int16_t msg;
555 __be16 *cid, *pcid;
555 556
556 ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); 557 ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
557 if (!ctlh) 558 if (!ctlh)
@@ -755,7 +756,7 @@ static struct ip_conntrack_helper pptp = {
755 } 756 }
756 }, 757 },
757 .mask = { .src = { .ip = 0, 758 .mask = { .src = { .ip = 0,
758 .u = { .tcp = { .port = 0xffff } } 759 .u = { .tcp = { .port = __constant_htons(0xffff) } }
759 }, 760 },
760 .dst = { .ip = 0, 761 .dst = { .ip = 0,
761 .u = { .all = 0 }, 762 .u = { .all = 0 },
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index 71ef19d126d0..186646eb249f 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -58,7 +58,7 @@ static int help(struct sk_buff **pskb,
58 goto out; 58 goto out;
59 59
60 rcu_read_lock(); 60 rcu_read_lock();
61 in_dev = __in_dev_get(rt->u.dst.dev); 61 in_dev = __in_dev_get_rcu(rt->u.dst.dev);
62 if (in_dev != NULL) { 62 if (in_dev != NULL) {
63 for_primary_ifa(in_dev) { 63 for_primary_ifa(in_dev) {
64 if (ifa->ifa_broadcast == iph->daddr) { 64 if (ifa->ifa_broadcast == iph->daddr) {
@@ -91,7 +91,7 @@ static int help(struct sk_buff **pskb,
91 ip_conntrack_expect_related(exp); 91 ip_conntrack_expect_related(exp);
92 ip_conntrack_expect_put(exp); 92 ip_conntrack_expect_put(exp);
93 93
94 ip_ct_refresh_acct(ct, ctinfo, NULL, timeout * HZ); 94 ip_ct_refresh(ct, *pskb, timeout * HZ);
95out: 95out:
96 return NF_ACCEPT; 96 return NF_ACCEPT;
97} 97}
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index b08a432efcf8..166e6069f121 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -177,11 +177,11 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct,
177 struct nfattr *nest_count = NFA_NEST(skb, type); 177 struct nfattr *nest_count = NFA_NEST(skb, type);
178 u_int64_t tmp; 178 u_int64_t tmp;
179 179
180 tmp = cpu_to_be64(ct->counters[dir].packets); 180 tmp = htonl(ct->counters[dir].packets);
181 NFA_PUT(skb, CTA_COUNTERS_PACKETS, sizeof(u_int64_t), &tmp); 181 NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp);
182 182
183 tmp = cpu_to_be64(ct->counters[dir].bytes); 183 tmp = htonl(ct->counters[dir].bytes);
184 NFA_PUT(skb, CTA_COUNTERS_BYTES, sizeof(u_int64_t), &tmp); 184 NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp);
185 185
186 NFA_NEST_END(skb, nest_count); 186 NFA_NEST_END(skb, nest_count);
187 187
@@ -833,7 +833,8 @@ out:
833static inline int 833static inline int
834ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[]) 834ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[])
835{ 835{
836 unsigned long d, status = *(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]); 836 unsigned long d;
837 unsigned status = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]));
837 d = ct->status ^ status; 838 d = ct->status ^ status;
838 839
839 if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) 840 if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
@@ -948,6 +949,31 @@ ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[])
948 return 0; 949 return 0;
949} 950}
950 951
952static inline int
953ctnetlink_change_protoinfo(struct ip_conntrack *ct, struct nfattr *cda[])
954{
955 struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1];
956 struct ip_conntrack_protocol *proto;
957 u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
958 int err = 0;
959
960 if (nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr) < 0)
961 goto nfattr_failure;
962
963 proto = ip_conntrack_proto_find_get(npt);
964 if (!proto)
965 return -EINVAL;
966
967 if (proto->from_nfattr)
968 err = proto->from_nfattr(tb, ct);
969 ip_conntrack_proto_put(proto);
970
971 return err;
972
973nfattr_failure:
974 return -ENOMEM;
975}
976
951static int 977static int
952ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[]) 978ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[])
953{ 979{
@@ -973,6 +999,12 @@ ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[])
973 return err; 999 return err;
974 } 1000 }
975 1001
1002 if (cda[CTA_PROTOINFO-1]) {
1003 err = ctnetlink_change_protoinfo(ct, cda);
1004 if (err < 0)
1005 return err;
1006 }
1007
976 DEBUGP("all done\n"); 1008 DEBUGP("all done\n");
977 return 0; 1009 return 0;
978} 1010}
@@ -1002,6 +1034,12 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
1002 if (err < 0) 1034 if (err < 0)
1003 goto err; 1035 goto err;
1004 1036
1037 if (cda[CTA_PROTOINFO-1]) {
1038 err = ctnetlink_change_protoinfo(ct, cda);
1039 if (err < 0)
1040 return err;
1041 }
1042
1005 ct->helper = ip_conntrack_helper_find_get(rtuple); 1043 ct->helper = ip_conntrack_helper_find_get(rtuple);
1006 1044
1007 add_timer(&ct->timeout); 1045 add_timer(&ct->timeout);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
index de3cb9db6f85..744abb9d377a 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -247,6 +247,7 @@ static int gre_packet(struct ip_conntrack *ct,
247 ct->proto.gre.stream_timeout); 247 ct->proto.gre.stream_timeout);
248 /* Also, more likely to be important, and not a probe. */ 248 /* Also, more likely to be important, and not a probe. */
249 set_bit(IPS_ASSURED_BIT, &ct->status); 249 set_bit(IPS_ASSURED_BIT, &ct->status);
250 ip_conntrack_event_cache(IPCT_STATUS, skb);
250 } else 251 } else
251 ip_ct_refresh_acct(ct, conntrackinfo, skb, 252 ip_ct_refresh_acct(ct, conntrackinfo, skb,
252 ct->proto.gre.timeout); 253 ct->proto.gre.timeout);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 838d1d69b36e..98f0015dd255 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -296,8 +296,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
296 struct ip_conntrack_tuple *tuple) 296 struct ip_conntrack_tuple *tuple)
297{ 297{
298 if (!tb[CTA_PROTO_ICMP_TYPE-1] 298 if (!tb[CTA_PROTO_ICMP_TYPE-1]
299 || !tb[CTA_PROTO_ICMP_CODE-1] 299 || !tb[CTA_PROTO_ICMP_CODE-1])
300 || !tb[CTA_PROTO_ICMP_ID-1])
301 return -1; 300 return -1;
302 301
303 tuple->dst.u.icmp.type = 302 tuple->dst.u.icmp.type =
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index a875f35e576d..59a4a0111dd3 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -416,6 +416,7 @@ static int sctp_packet(struct ip_conntrack *conntrack,
416 && newconntrack == SCTP_CONNTRACK_ESTABLISHED) { 416 && newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
417 DEBUGP("Setting assured bit\n"); 417 DEBUGP("Setting assured bit\n");
418 set_bit(IPS_ASSURED_BIT, &conntrack->status); 418 set_bit(IPS_ASSURED_BIT, &conntrack->status);
419 ip_conntrack_event_cache(IPCT_STATUS, skb);
419 } 420 }
420 421
421 return NF_ACCEPT; 422 return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 1985abc59d24..d6701cafbcc2 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -341,17 +341,43 @@ static int tcp_print_conntrack(struct seq_file *s,
341static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, 341static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
342 const struct ip_conntrack *ct) 342 const struct ip_conntrack *ct)
343{ 343{
344 struct nfattr *nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
345
344 read_lock_bh(&tcp_lock); 346 read_lock_bh(&tcp_lock);
345 NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), 347 NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
346 &ct->proto.tcp.state); 348 &ct->proto.tcp.state);
347 read_unlock_bh(&tcp_lock); 349 read_unlock_bh(&tcp_lock);
348 350
351 NFA_NEST_END(skb, nest_parms);
352
349 return 0; 353 return 0;
350 354
351nfattr_failure: 355nfattr_failure:
352 read_unlock_bh(&tcp_lock); 356 read_unlock_bh(&tcp_lock);
353 return -1; 357 return -1;
354} 358}
359
360static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct)
361{
362 struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
363 struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
364
365 if (nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr) < 0)
366 goto nfattr_failure;
367
368 if (!tb[CTA_PROTOINFO_TCP_STATE-1])
369 return -EINVAL;
370
371 write_lock_bh(&tcp_lock);
372 ct->proto.tcp.state =
373 *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
374 write_unlock_bh(&tcp_lock);
375
376 return 0;
377
378nfattr_failure:
379 return -1;
380}
355#endif 381#endif
356 382
357static unsigned int get_conntrack_index(const struct tcphdr *tcph) 383static unsigned int get_conntrack_index(const struct tcphdr *tcph)
@@ -1014,7 +1040,8 @@ static int tcp_packet(struct ip_conntrack *conntrack,
1014 /* Set ASSURED if we see see valid ack in ESTABLISHED 1040 /* Set ASSURED if we see see valid ack in ESTABLISHED
1015 after SYN_RECV or a valid answer for a picked up 1041 after SYN_RECV or a valid answer for a picked up
1016 connection. */ 1042 connection. */
1017 set_bit(IPS_ASSURED_BIT, &conntrack->status); 1043 set_bit(IPS_ASSURED_BIT, &conntrack->status);
1044 ip_conntrack_event_cache(IPCT_STATUS, skb);
1018 } 1045 }
1019 ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout); 1046 ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
1020 1047
@@ -1122,6 +1149,7 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
1122#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ 1149#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
1123 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) 1150 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
1124 .to_nfattr = tcp_to_nfattr, 1151 .to_nfattr = tcp_to_nfattr,
1152 .from_nfattr = nfattr_to_tcp,
1125 .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, 1153 .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
1126 .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, 1154 .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
1127#endif 1155#endif
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index d3c7808010ec..dd476b191f4b 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -989,7 +989,7 @@ EXPORT_SYMBOL(need_ip_conntrack);
989EXPORT_SYMBOL(ip_conntrack_helper_register); 989EXPORT_SYMBOL(ip_conntrack_helper_register);
990EXPORT_SYMBOL(ip_conntrack_helper_unregister); 990EXPORT_SYMBOL(ip_conntrack_helper_unregister);
991EXPORT_SYMBOL(ip_ct_iterate_cleanup); 991EXPORT_SYMBOL(ip_ct_iterate_cleanup);
992EXPORT_SYMBOL(ip_ct_refresh_acct); 992EXPORT_SYMBOL(__ip_ct_refresh_acct);
993 993
994EXPORT_SYMBOL(ip_conntrack_expect_alloc); 994EXPORT_SYMBOL(ip_conntrack_expect_alloc);
995EXPORT_SYMBOL(ip_conntrack_expect_put); 995EXPORT_SYMBOL(ip_conntrack_expect_put);
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index c3ea891d38e7..c5e3abd24672 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -74,12 +74,14 @@ ip_nat_proto_find_get(u_int8_t protonum)
74 74
75 return p; 75 return p;
76} 76}
77EXPORT_SYMBOL_GPL(ip_nat_proto_find_get);
77 78
78void 79void
79ip_nat_proto_put(struct ip_nat_protocol *p) 80ip_nat_proto_put(struct ip_nat_protocol *p)
80{ 81{
81 module_put(p->me); 82 module_put(p->me);
82} 83}
84EXPORT_SYMBOL_GPL(ip_nat_proto_put);
83 85
84/* We keep an extra hash for each conntrack, for fast searching. */ 86/* We keep an extra hash for each conntrack, for fast searching. */
85static inline unsigned int 87static inline unsigned int
@@ -111,6 +113,7 @@ ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
111 return csum_fold(csum_partial((char *)diffs, sizeof(diffs), 113 return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
112 oldcheck^0xFFFF)); 114 oldcheck^0xFFFF));
113} 115}
116EXPORT_SYMBOL(ip_nat_cheat_check);
114 117
115/* Is this tuple already taken? (not by us) */ 118/* Is this tuple already taken? (not by us) */
116int 119int
@@ -127,6 +130,7 @@ ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
127 invert_tuplepr(&reply, tuple); 130 invert_tuplepr(&reply, tuple);
128 return ip_conntrack_tuple_taken(&reply, ignored_conntrack); 131 return ip_conntrack_tuple_taken(&reply, ignored_conntrack);
129} 132}
133EXPORT_SYMBOL(ip_nat_used_tuple);
130 134
131/* If we source map this tuple so reply looks like reply_tuple, will 135/* If we source map this tuple so reply looks like reply_tuple, will
132 * that meet the constraints of range. */ 136 * that meet the constraints of range. */
@@ -347,6 +351,7 @@ ip_nat_setup_info(struct ip_conntrack *conntrack,
347 351
348 return NF_ACCEPT; 352 return NF_ACCEPT;
349} 353}
354EXPORT_SYMBOL(ip_nat_setup_info);
350 355
351/* Returns true if succeeded. */ 356/* Returns true if succeeded. */
352static int 357static int
@@ -387,10 +392,10 @@ manip_pkt(u_int16_t proto,
387} 392}
388 393
389/* Do packet manipulations according to ip_nat_setup_info. */ 394/* Do packet manipulations according to ip_nat_setup_info. */
390unsigned int nat_packet(struct ip_conntrack *ct, 395unsigned int ip_nat_packet(struct ip_conntrack *ct,
391 enum ip_conntrack_info ctinfo, 396 enum ip_conntrack_info ctinfo,
392 unsigned int hooknum, 397 unsigned int hooknum,
393 struct sk_buff **pskb) 398 struct sk_buff **pskb)
394{ 399{
395 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 400 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
396 unsigned long statusbit; 401 unsigned long statusbit;
@@ -417,12 +422,13 @@ unsigned int nat_packet(struct ip_conntrack *ct,
417 } 422 }
418 return NF_ACCEPT; 423 return NF_ACCEPT;
419} 424}
425EXPORT_SYMBOL_GPL(ip_nat_packet);
420 426
421/* Dir is direction ICMP is coming from (opposite to packet it contains) */ 427/* Dir is direction ICMP is coming from (opposite to packet it contains) */
422int icmp_reply_translation(struct sk_buff **pskb, 428int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
423 struct ip_conntrack *ct, 429 struct ip_conntrack *ct,
424 enum ip_nat_manip_type manip, 430 enum ip_nat_manip_type manip,
425 enum ip_conntrack_dir dir) 431 enum ip_conntrack_dir dir)
426{ 432{
427 struct { 433 struct {
428 struct icmphdr icmp; 434 struct icmphdr icmp;
@@ -509,6 +515,7 @@ int icmp_reply_translation(struct sk_buff **pskb,
509 515
510 return 1; 516 return 1;
511} 517}
518EXPORT_SYMBOL_GPL(ip_nat_icmp_reply_translation);
512 519
513/* Protocol registration. */ 520/* Protocol registration. */
514int ip_nat_protocol_register(struct ip_nat_protocol *proto) 521int ip_nat_protocol_register(struct ip_nat_protocol *proto)
@@ -525,6 +532,7 @@ int ip_nat_protocol_register(struct ip_nat_protocol *proto)
525 write_unlock_bh(&ip_nat_lock); 532 write_unlock_bh(&ip_nat_lock);
526 return ret; 533 return ret;
527} 534}
535EXPORT_SYMBOL(ip_nat_protocol_register);
528 536
529/* Noone stores the protocol anywhere; simply delete it. */ 537/* Noone stores the protocol anywhere; simply delete it. */
530void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) 538void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
@@ -536,6 +544,7 @@ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
536 /* Someone could be still looking at the proto in a bh. */ 544 /* Someone could be still looking at the proto in a bh. */
537 synchronize_net(); 545 synchronize_net();
538} 546}
547EXPORT_SYMBOL(ip_nat_protocol_unregister);
539 548
540#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ 549#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
541 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) 550 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
@@ -582,7 +591,7 @@ EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range);
582EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr); 591EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
583#endif 592#endif
584 593
585int __init ip_nat_init(void) 594static int __init ip_nat_init(void)
586{ 595{
587 size_t i; 596 size_t i;
588 597
@@ -624,10 +633,14 @@ static int clean_nat(struct ip_conntrack *i, void *data)
624 return 0; 633 return 0;
625} 634}
626 635
627/* Not __exit: called from ip_nat_standalone.c:init_or_cleanup() --RR */ 636static void __exit ip_nat_cleanup(void)
628void ip_nat_cleanup(void)
629{ 637{
630 ip_ct_iterate_cleanup(&clean_nat, NULL); 638 ip_ct_iterate_cleanup(&clean_nat, NULL);
631 ip_conntrack_destroyed = NULL; 639 ip_conntrack_destroyed = NULL;
632 vfree(bysource); 640 vfree(bysource);
633} 641}
642
643MODULE_LICENSE("GPL");
644
645module_init(ip_nat_init);
646module_exit(ip_nat_cleanup);
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index d2dd5d313556..5d506e0564d5 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -199,6 +199,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
199 } 199 }
200 return 1; 200 return 1;
201} 201}
202EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
202 203
203/* Generic function for mangling variable-length address changes inside 204/* Generic function for mangling variable-length address changes inside
204 * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX 205 * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
@@ -256,6 +257,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
256 257
257 return 1; 258 return 1;
258} 259}
260EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
259 261
260/* Adjust one found SACK option including checksum correction */ 262/* Adjust one found SACK option including checksum correction */
261static void 263static void
@@ -399,6 +401,7 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
399 401
400 return 1; 402 return 1;
401} 403}
404EXPORT_SYMBOL(ip_nat_seq_adjust);
402 405
403/* Setup NAT on this expected conntrack so it follows master. */ 406/* Setup NAT on this expected conntrack so it follows master. */
404/* If we fail to get a free NAT slot, we'll get dropped on confirm */ 407/* If we fail to get a free NAT slot, we'll get dropped on confirm */
@@ -425,3 +428,4 @@ void ip_nat_follow_master(struct ip_conntrack *ct,
425 /* hook doesn't matter, but it has to do destination manip */ 428 /* hook doesn't matter, but it has to do destination manip */
426 ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); 429 ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
427} 430}
431EXPORT_SYMBOL(ip_nat_follow_master);
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 0ff368b131f6..30cd4e18c129 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -108,8 +108,8 @@ ip_nat_fn(unsigned int hooknum,
108 case IP_CT_RELATED: 108 case IP_CT_RELATED:
109 case IP_CT_RELATED+IP_CT_IS_REPLY: 109 case IP_CT_RELATED+IP_CT_IS_REPLY:
110 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { 110 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
111 if (!icmp_reply_translation(pskb, ct, maniptype, 111 if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype,
112 CTINFO2DIR(ctinfo))) 112 CTINFO2DIR(ctinfo)))
113 return NF_DROP; 113 return NF_DROP;
114 else 114 else
115 return NF_ACCEPT; 115 return NF_ACCEPT;
@@ -152,7 +152,7 @@ ip_nat_fn(unsigned int hooknum,
152 } 152 }
153 153
154 IP_NF_ASSERT(info); 154 IP_NF_ASSERT(info);
155 return nat_packet(ct, ctinfo, hooknum, pskb); 155 return ip_nat_packet(ct, ctinfo, hooknum, pskb);
156} 156}
157 157
158static unsigned int 158static unsigned int
@@ -325,15 +325,10 @@ static int init_or_cleanup(int init)
325 printk("ip_nat_init: can't setup rules.\n"); 325 printk("ip_nat_init: can't setup rules.\n");
326 goto cleanup_nothing; 326 goto cleanup_nothing;
327 } 327 }
328 ret = ip_nat_init();
329 if (ret < 0) {
330 printk("ip_nat_init: can't setup rules.\n");
331 goto cleanup_rule_init;
332 }
333 ret = nf_register_hook(&ip_nat_in_ops); 328 ret = nf_register_hook(&ip_nat_in_ops);
334 if (ret < 0) { 329 if (ret < 0) {
335 printk("ip_nat_init: can't register in hook.\n"); 330 printk("ip_nat_init: can't register in hook.\n");
336 goto cleanup_nat; 331 goto cleanup_rule_init;
337 } 332 }
338 ret = nf_register_hook(&ip_nat_out_ops); 333 ret = nf_register_hook(&ip_nat_out_ops);
339 if (ret < 0) { 334 if (ret < 0) {
@@ -374,8 +369,6 @@ static int init_or_cleanup(int init)
374 nf_unregister_hook(&ip_nat_out_ops); 369 nf_unregister_hook(&ip_nat_out_ops);
375 cleanup_inops: 370 cleanup_inops:
376 nf_unregister_hook(&ip_nat_in_ops); 371 nf_unregister_hook(&ip_nat_in_ops);
377 cleanup_nat:
378 ip_nat_cleanup();
379 cleanup_rule_init: 372 cleanup_rule_init:
380 ip_nat_rule_cleanup(); 373 ip_nat_rule_cleanup();
381 cleanup_nothing: 374 cleanup_nothing:
@@ -395,14 +388,4 @@ static void __exit fini(void)
395module_init(init); 388module_init(init);
396module_exit(fini); 389module_exit(fini);
397 390
398EXPORT_SYMBOL(ip_nat_setup_info);
399EXPORT_SYMBOL(ip_nat_protocol_register);
400EXPORT_SYMBOL(ip_nat_protocol_unregister);
401EXPORT_SYMBOL_GPL(ip_nat_proto_find_get);
402EXPORT_SYMBOL_GPL(ip_nat_proto_put);
403EXPORT_SYMBOL(ip_nat_cheat_check);
404EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
405EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
406EXPORT_SYMBOL(ip_nat_used_tuple);
407EXPORT_SYMBOL(ip_nat_follow_master);
408MODULE_LICENSE("GPL"); 391MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index d54f14d926f6..36339eb39e17 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -240,8 +240,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
240 240
241 pmsg->packet_id = (unsigned long )entry; 241 pmsg->packet_id = (unsigned long )entry;
242 pmsg->data_len = data_len; 242 pmsg->data_len = data_len;
243 pmsg->timestamp_sec = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec; 243 pmsg->timestamp_sec = entry->skb->tstamp.off_sec;
244 pmsg->timestamp_usec = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec; 244 pmsg->timestamp_usec = entry->skb->tstamp.off_usec;
245 pmsg->mark = entry->skb->nfmark; 245 pmsg->mark = entry->skb->nfmark;
246 pmsg->hook = entry->info->hook; 246 pmsg->hook = entry->info->hook;
247 pmsg->hw_protocol = entry->skb->protocol; 247 pmsg->hw_protocol = entry->skb->protocol;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index eef99a1b5de6..75c27e92f6ab 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -27,6 +27,7 @@
27#include <asm/semaphore.h> 27#include <asm/semaphore.h>
28#include <linux/proc_fs.h> 28#include <linux/proc_fs.h>
29#include <linux/err.h> 29#include <linux/err.h>
30#include <linux/cpumask.h>
30 31
31#include <linux/netfilter_ipv4/ip_tables.h> 32#include <linux/netfilter_ipv4/ip_tables.h>
32 33
@@ -921,8 +922,10 @@ translate_table(const char *name,
921 } 922 }
922 923
923 /* And one copy for every other CPU */ 924 /* And one copy for every other CPU */
924 for (i = 1; i < num_possible_cpus(); i++) { 925 for_each_cpu(i) {
925 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i, 926 if (i == 0)
927 continue;
928 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
926 newinfo->entries, 929 newinfo->entries,
927 SMP_ALIGN(newinfo->size)); 930 SMP_ALIGN(newinfo->size));
928 } 931 }
@@ -943,7 +946,7 @@ replace_table(struct ipt_table *table,
943 struct ipt_entry *table_base; 946 struct ipt_entry *table_base;
944 unsigned int i; 947 unsigned int i;
945 948
946 for (i = 0; i < num_possible_cpus(); i++) { 949 for_each_cpu(i) {
947 table_base = 950 table_base =
948 (void *)newinfo->entries 951 (void *)newinfo->entries
949 + TABLE_OFFSET(newinfo, i); 952 + TABLE_OFFSET(newinfo, i);
@@ -990,7 +993,7 @@ get_counters(const struct ipt_table_info *t,
990 unsigned int cpu; 993 unsigned int cpu;
991 unsigned int i; 994 unsigned int i;
992 995
993 for (cpu = 0; cpu < num_possible_cpus(); cpu++) { 996 for_each_cpu(cpu) {
994 i = 0; 997 i = 0;
995 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu), 998 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
996 t->size, 999 t->size,
@@ -1128,7 +1131,8 @@ do_replace(void __user *user, unsigned int len)
1128 return -ENOMEM; 1131 return -ENOMEM;
1129 1132
1130 newinfo = vmalloc(sizeof(struct ipt_table_info) 1133 newinfo = vmalloc(sizeof(struct ipt_table_info)
1131 + SMP_ALIGN(tmp.size) * num_possible_cpus()); 1134 + SMP_ALIGN(tmp.size) *
1135 (highest_possible_processor_id()+1));
1132 if (!newinfo) 1136 if (!newinfo)
1133 return -ENOMEM; 1137 return -ENOMEM;
1134 1138
@@ -1458,7 +1462,8 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
1458 = { 0, 0, 0, { 0 }, { 0 }, { } }; 1462 = { 0, 0, 0, { 0 }, { 0 }, { } };
1459 1463
1460 newinfo = vmalloc(sizeof(struct ipt_table_info) 1464 newinfo = vmalloc(sizeof(struct ipt_table_info)
1461 + SMP_ALIGN(repl->size) * num_possible_cpus()); 1465 + SMP_ALIGN(repl->size) *
1466 (highest_possible_processor_id()+1));
1462 if (!newinfo) 1467 if (!newinfo)
1463 return -ENOMEM; 1468 return -ENOMEM;
1464 1469
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 715cb613405c..5245bfd33d52 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -93,7 +93,7 @@ redirect_target(struct sk_buff **pskb,
93 newdst = 0; 93 newdst = 0;
94 94
95 rcu_read_lock(); 95 rcu_read_lock();
96 indev = __in_dev_get((*pskb)->dev); 96 indev = __in_dev_get_rcu((*pskb)->dev);
97 if (indev && (ifa = indev->ifa_list)) 97 if (indev && (ifa = indev->ifa_list))
98 newdst = ifa->ifa_local; 98 newdst = ifa->ifa_local;
99 rcu_read_unlock(); 99 rcu_read_unlock();
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index e2c14f3cb2fc..2883ccd8a91d 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -225,8 +225,8 @@ static void ipt_ulog_packet(unsigned int hooknum,
225 225
226 /* copy hook, prefix, timestamp, payload, etc. */ 226 /* copy hook, prefix, timestamp, payload, etc. */
227 pm->data_len = copy_len; 227 pm->data_len = copy_len;
228 pm->timestamp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec; 228 pm->timestamp_sec = skb->tstamp.off_sec;
229 pm->timestamp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec; 229 pm->timestamp_usec = skb->tstamp.off_usec;
230 pm->mark = skb->nfmark; 230 pm->mark = skb->nfmark;
231 pm->hook = hooknum; 231 pm->hook = hooknum;
232 if (prefix != NULL) 232 if (prefix != NULL)
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index f7943ba1f43c..a65e508fbd40 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -90,9 +90,7 @@ fold_field(void *mib[], int offt)
90 unsigned long res = 0; 90 unsigned long res = 0;
91 int i; 91 int i;
92 92
93 for (i = 0; i < NR_CPUS; i++) { 93 for_each_cpu(i) {
94 if (!cpu_possible(i))
95 continue;
96 res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt); 94 res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
97 res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt); 95 res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
98 } 96 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8549f26e2495..381dd6a6aebb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2128,7 +2128,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
2128 struct in_device *in_dev; 2128 struct in_device *in_dev;
2129 2129
2130 rcu_read_lock(); 2130 rcu_read_lock();
2131 if ((in_dev = __in_dev_get(dev)) != NULL) { 2131 if ((in_dev = __in_dev_get_rcu(dev)) != NULL) {
2132 int our = ip_check_mc(in_dev, daddr, saddr, 2132 int our = ip_check_mc(in_dev, daddr, saddr,
2133 skb->nh.iph->protocol); 2133 skb->nh.iph->protocol);
2134 if (our 2134 if (our
@@ -2443,7 +2443,9 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
2443 err = -ENODEV; 2443 err = -ENODEV;
2444 if (dev_out == NULL) 2444 if (dev_out == NULL)
2445 goto out; 2445 goto out;
2446 if (__in_dev_get(dev_out) == NULL) { 2446
2447 /* RACE: Check return value of inet_select_addr instead. */
2448 if (__in_dev_get_rtnl(dev_out) == NULL) {
2447 dev_put(dev_out); 2449 dev_put(dev_out);
2448 goto out; /* Wrong error code */ 2450 goto out; /* Wrong error code */
2449 } 2451 }
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index b940346de4e7..6d80e063c187 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -136,7 +136,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
136 else if (cwnd < ca->last_max_cwnd + max_increment*(BICTCP_B-1)) 136 else if (cwnd < ca->last_max_cwnd + max_increment*(BICTCP_B-1))
137 /* slow start */ 137 /* slow start */
138 ca->cnt = (cwnd * (BICTCP_B-1)) 138 ca->cnt = (cwnd * (BICTCP_B-1))
139 / cwnd-ca->last_max_cwnd; 139 / (cwnd - ca->last_max_cwnd);
140 else 140 else
141 /* linear increase */ 141 /* linear increase */
142 ca->cnt = cwnd / max_increment; 142 ca->cnt = cwnd / max_increment;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a7537c7bbd06..3e98b57578dc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -355,8 +355,6 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
355 app_win -= icsk->icsk_ack.rcv_mss; 355 app_win -= icsk->icsk_ack.rcv_mss;
356 app_win = max(app_win, 2U*tp->advmss); 356 app_win = max(app_win, 2U*tp->advmss);
357 357
358 if (!ofo_win)
359 tp->window_clamp = min(tp->window_clamp, app_win);
360 tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); 358 tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
361 } 359 }
362} 360}
@@ -2241,6 +2239,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
2241 /* Note, it is the only place, where 2239 /* Note, it is the only place, where
2242 * fast path is recovered for sending TCP. 2240 * fast path is recovered for sending TCP.
2243 */ 2241 */
2242 tp->pred_flags = 0;
2244 tcp_fast_path_check(sk, tp); 2243 tcp_fast_path_check(sk, tp);
2245 2244
2246 if (nwin > tp->max_window) { 2245 if (nwin > tp->max_window) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 13dfb391cdf1..c85819d8474b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -130,19 +130,20 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
130 int dif = sk->sk_bound_dev_if; 130 int dif = sk->sk_bound_dev_if;
131 INET_ADDR_COOKIE(acookie, saddr, daddr) 131 INET_ADDR_COOKIE(acookie, saddr, daddr)
132 const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); 132 const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
133 const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size); 133 unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
134 struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; 134 struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
135 struct sock *sk2; 135 struct sock *sk2;
136 const struct hlist_node *node; 136 const struct hlist_node *node;
137 struct inet_timewait_sock *tw; 137 struct inet_timewait_sock *tw;
138 138
139 prefetch(head->chain.first);
139 write_lock(&head->lock); 140 write_lock(&head->lock);
140 141
141 /* Check TIME-WAIT sockets first. */ 142 /* Check TIME-WAIT sockets first. */
142 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { 143 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
143 tw = inet_twsk(sk2); 144 tw = inet_twsk(sk2);
144 145
145 if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { 146 if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
146 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); 147 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
147 struct tcp_sock *tp = tcp_sk(sk); 148 struct tcp_sock *tp = tcp_sk(sk);
148 149
@@ -179,7 +180,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
179 180
180 /* And established part... */ 181 /* And established part... */
181 sk_for_each(sk2, node, &head->chain) { 182 sk_for_each(sk2, node, &head->chain) {
182 if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) 183 if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
183 goto not_unique; 184 goto not_unique;
184 } 185 }
185 186
@@ -188,7 +189,7 @@ unique:
188 * in hash table socket with a funny identity. */ 189 * in hash table socket with a funny identity. */
189 inet->num = lport; 190 inet->num = lport;
190 inet->sport = htons(lport); 191 inet->sport = htons(lport);
191 sk->sk_hashent = hash; 192 sk->sk_hash = hash;
192 BUG_TRAP(sk_unhashed(sk)); 193 BUG_TRAP(sk_unhashed(sk));
193 __sk_add_node(sk, &head->chain); 194 __sk_add_node(sk, &head->chain);
194 sock_prot_inc_use(sk->sk_prot); 195 sock_prot_inc_use(sk->sk_prot);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5dd6dd7d091e..b907456a79f4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -190,7 +190,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
190 } 190 }
191 191
192 /* Set initial window to value enough for senders, 192 /* Set initial window to value enough for senders,
193 * following RFC1414. Senders, not following this RFC, 193 * following RFC2414. Senders, not following this RFC,
194 * will be satisfied with 2. 194 * will be satisfied with 2.
195 */ 195 */
196 if (mss > (1<<*rcv_wscale)) { 196 if (mss > (1<<*rcv_wscale)) {
@@ -435,8 +435,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
435 int nsize, old_factor; 435 int nsize, old_factor;
436 u16 flags; 436 u16 flags;
437 437
438 BUG_ON(len >= skb->len); 438 BUG_ON(len > skb->len);
439
440 nsize = skb_headlen(skb) - len; 439 nsize = skb_headlen(skb) - len;
441 if (nsize < 0) 440 if (nsize < 0)
442 nsize = 0; 441 nsize = 0;
@@ -509,7 +508,16 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
509 tp->lost_out -= diff; 508 tp->lost_out -= diff;
510 tp->left_out -= diff; 509 tp->left_out -= diff;
511 } 510 }
511
512 if (diff > 0) { 512 if (diff > 0) {
513 /* Adjust Reno SACK estimate. */
514 if (!tp->rx_opt.sack_ok) {
515 tp->sacked_out -= diff;
516 if ((int)tp->sacked_out < 0)
517 tp->sacked_out = 0;
518 tcp_sync_left_out(tp);
519 }
520
513 tp->fackets_out -= diff; 521 tp->fackets_out -= diff;
514 if ((int)tp->fackets_out < 0) 522 if ((int)tp->fackets_out < 0)
515 tp->fackets_out = 0; 523 tp->fackets_out = 0;
@@ -1601,7 +1609,7 @@ void tcp_send_fin(struct sock *sk)
1601 * was unread data in the receive queue. This behavior is recommended 1609 * was unread data in the receive queue. This behavior is recommended
1602 * by draft-ietf-tcpimpl-prob-03.txt section 3.10. -DaveM 1610 * by draft-ietf-tcpimpl-prob-03.txt section 3.10. -DaveM
1603 */ 1611 */
1604void tcp_send_active_reset(struct sock *sk, unsigned int __nocast priority) 1612void tcp_send_active_reset(struct sock *sk, gfp_t priority)
1605{ 1613{
1606 struct tcp_sock *tp = tcp_sk(sk); 1614 struct tcp_sock *tp = tcp_sk(sk);
1607 struct sk_buff *skb; 1615 struct sk_buff *skb;