aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c6
-rw-r--r--net/8021q/vlan_dev.c3
-rw-r--r--net/bridge/br_netfilter.c19
-rw-r--r--net/core/dev.c3
-rw-r--r--net/core/filter.c4
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/dccp/ipv4.c2
-rw-r--r--net/decnet/af_decnet.c25
-rw-r--r--net/decnet/sysctl_net_decnet.c33
-rw-r--r--net/ieee80211/Kconfig2
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/netfilter/Kconfig8
-rw-r--r--net/ipv4/netfilter/Makefile3
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c20
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c12
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c3
-rw-r--r--net/ipv4/netfilter/ip_nat_tftp.c5
-rw-r--r--net/ipv4/tcp_output.c233
-rw-r--r--net/ipv4/tcp_vegas.c16
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv6/addrconf.c137
-rw-r--r--net/ipv6/esp6.c2
-rw-r--r--net/ipv6/icmp.c16
-rw-r--r--net/ipv6/mcast.c142
-rw-r--r--net/ipv6/netfilter/Kconfig2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c12
-rw-r--r--net/ipv6/route.c15
-rw-r--r--net/ipv6/tcp_ipv6.c19
-rw-r--r--net/ipv6/xfrm6_policy.c1
-rw-r--r--net/netfilter/Kconfig4
-rw-r--r--net/netfilter/nf_conntrack_core.c3
-rw-r--r--net/netfilter/nfnetlink.c5
-rw-r--r--net/netrom/nr_in.c6
-rw-r--r--net/packet/af_packet.c115
-rw-r--r--net/sched/act_api.c2
-rw-r--r--net/sctp/socket.c16
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c6
-rw-r--r--net/sunrpc/rpc_pipe.c4
-rw-r--r--net/sunrpc/xprtsock.c2
-rw-r--r--net/xfrm/xfrm_policy.c49
-rw-r--r--net/xfrm/xfrm_state.c5
41 files changed, 639 insertions, 326 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 91e412b0ab00..67465b65abe4 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -753,6 +753,8 @@ static int vlan_ioctl_handler(void __user *arg)
753 break; 753 break;
754 case GET_VLAN_REALDEV_NAME_CMD: 754 case GET_VLAN_REALDEV_NAME_CMD:
755 err = vlan_dev_get_realdev_name(args.device1, args.u.device2); 755 err = vlan_dev_get_realdev_name(args.device1, args.u.device2);
756 if (err)
757 goto out;
756 if (copy_to_user(arg, &args, 758 if (copy_to_user(arg, &args,
757 sizeof(struct vlan_ioctl_args))) { 759 sizeof(struct vlan_ioctl_args))) {
758 err = -EFAULT; 760 err = -EFAULT;
@@ -761,6 +763,8 @@ static int vlan_ioctl_handler(void __user *arg)
761 763
762 case GET_VLAN_VID_CMD: 764 case GET_VLAN_VID_CMD:
763 err = vlan_dev_get_vid(args.device1, &vid); 765 err = vlan_dev_get_vid(args.device1, &vid);
766 if (err)
767 goto out;
764 args.u.VID = vid; 768 args.u.VID = vid;
765 if (copy_to_user(arg, &args, 769 if (copy_to_user(arg, &args,
766 sizeof(struct vlan_ioctl_args))) { 770 sizeof(struct vlan_ioctl_args))) {
@@ -774,7 +778,7 @@ static int vlan_ioctl_handler(void __user *arg)
774 __FUNCTION__, args.cmd); 778 __FUNCTION__, args.cmd);
775 return -EINVAL; 779 return -EINVAL;
776 }; 780 };
777 781out:
778 return err; 782 return err;
779} 783}
780 784
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b74864889670..f2a8750bbf1d 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -165,6 +165,9 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
165 165
166 skb_pull(skb, VLAN_HLEN); /* take off the VLAN header (4 bytes currently) */ 166 skb_pull(skb, VLAN_HLEN); /* take off the VLAN header (4 bytes currently) */
167 167
168 /* Need to correct hardware checksum */
169 skb_postpull_rcsum(skb, vhdr, VLAN_HLEN);
170
168 /* Ok, lets check to make sure the device (dev) we 171 /* Ok, lets check to make sure the device (dev) we
169 * came in on is what this VLAN is attached to. 172 * came in on is what this VLAN is attached to.
170 */ 173 */
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index d8e36b775125..23422bd53a5e 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -295,7 +295,7 @@ static int check_hbh_len(struct sk_buff *skb)
295 len -= 2; 295 len -= 2;
296 296
297 while (len > 0) { 297 while (len > 0) {
298 int optlen = raw[off+1]+2; 298 int optlen = skb->nh.raw[off+1]+2;
299 299
300 switch (skb->nh.raw[off]) { 300 switch (skb->nh.raw[off]) {
301 case IPV6_TLV_PAD0: 301 case IPV6_TLV_PAD0:
@@ -308,18 +308,15 @@ static int check_hbh_len(struct sk_buff *skb)
308 case IPV6_TLV_JUMBO: 308 case IPV6_TLV_JUMBO:
309 if (skb->nh.raw[off+1] != 4 || (off&3) != 2) 309 if (skb->nh.raw[off+1] != 4 || (off&3) != 2)
310 goto bad; 310 goto bad;
311
312 pkt_len = ntohl(*(u32*)(skb->nh.raw+off+2)); 311 pkt_len = ntohl(*(u32*)(skb->nh.raw+off+2));
313 312 if (pkt_len <= IPV6_MAXPLEN ||
313 skb->nh.ipv6h->payload_len)
314 goto bad;
314 if (pkt_len > skb->len - sizeof(struct ipv6hdr)) 315 if (pkt_len > skb->len - sizeof(struct ipv6hdr))
315 goto bad; 316 goto bad;
316 if (pkt_len + sizeof(struct ipv6hdr) < skb->len) { 317 if (pskb_trim_rcsum(skb,
317 if (__pskb_trim(skb, 318 pkt_len+sizeof(struct ipv6hdr)))
318 pkt_len + sizeof(struct ipv6hdr))) 319 goto bad;
319 goto bad;
320 if (skb->ip_summed == CHECKSUM_HW)
321 skb->ip_summed = CHECKSUM_NONE;
322 }
323 break; 320 break;
324 default: 321 default:
325 if (optlen > len) 322 if (optlen > len)
@@ -372,6 +369,7 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
372 if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb)) 369 if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
373 goto inhdr_error; 370 goto inhdr_error;
374 371
372 nf_bridge_put(skb->nf_bridge);
375 if ((nf_bridge = nf_bridge_alloc(skb)) == NULL) 373 if ((nf_bridge = nf_bridge_alloc(skb)) == NULL)
376 return NF_DROP; 374 return NF_DROP;
377 setup_pre_routing(skb); 375 setup_pre_routing(skb);
@@ -455,6 +453,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
455 skb->ip_summed = CHECKSUM_NONE; 453 skb->ip_summed = CHECKSUM_NONE;
456 } 454 }
457 455
456 nf_bridge_put(skb->nf_bridge);
458 if ((nf_bridge = nf_bridge_alloc(skb)) == NULL) 457 if ((nf_bridge = nf_bridge_alloc(skb)) == NULL)
459 return NF_DROP; 458 return NF_DROP;
460 setup_pre_routing(skb); 459 setup_pre_routing(skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 0b48e294aafe..a5efc9ae010b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1113,7 +1113,8 @@ out:
1113void netdev_rx_csum_fault(struct net_device *dev) 1113void netdev_rx_csum_fault(struct net_device *dev)
1114{ 1114{
1115 if (net_ratelimit()) { 1115 if (net_ratelimit()) {
1116 printk(KERN_ERR "%s: hw csum failure.\n", dev->name); 1116 printk(KERN_ERR "%s: hw csum failure.\n",
1117 dev ? dev->name : "<unknown>");
1117 dump_stack(); 1118 dump_stack();
1118 } 1119 }
1119} 1120}
diff --git a/net/core/filter.c b/net/core/filter.c
index 2841bfce29d6..3a10e0bc90e8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -293,7 +293,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
293 struct sock_filter *ftest; 293 struct sock_filter *ftest;
294 int pc; 294 int pc;
295 295
296 if (((unsigned int)flen >= (~0U / sizeof(struct sock_filter))) || flen == 0) 296 if (flen == 0 || flen > BPF_MAXINSNS)
297 return -EINVAL; 297 return -EINVAL;
298 298
299 /* check the filter code now */ 299 /* check the filter code now */
@@ -360,7 +360,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
360 int err; 360 int err;
361 361
362 /* Make sure new filter is there and in the right amounts. */ 362 /* Make sure new filter is there and in the right amounts. */
363 if (fprog->filter == NULL || fprog->len > BPF_MAXINSNS) 363 if (fprog->filter == NULL)
364 return -EINVAL; 364 return -EINVAL;
365 365
366 fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL); 366 fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b7d13a4fff48..83fee37de38e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1725,7 +1725,7 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
1725 * of the skb if any page alloc fails user this procedure returns -ENOMEM 1725 * of the skb if any page alloc fails user this procedure returns -ENOMEM
1726 */ 1726 */
1727int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, 1727int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
1728 int getfrag(void *from, char *to, int offset, 1728 int (*getfrag)(void *from, char *to, int offset,
1729 int len, int odd, struct sk_buff *skb), 1729 int len, int odd, struct sk_buff *skb),
1730 void *from, int length) 1730 void *from, int length)
1731{ 1731{
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ca03521112c5..656e13e38cfb 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1251,7 +1251,7 @@ static int dccp_v4_destroy_sock(struct sock *sk)
1251 struct dccp_sock *dp = dccp_sk(sk); 1251 struct dccp_sock *dp = dccp_sk(sk);
1252 1252
1253 /* 1253 /*
1254 * DCCP doesn't use sk_qrite_queue, just sk_send_head 1254 * DCCP doesn't use sk_write_queue, just sk_send_head
1255 * for retransmissions 1255 * for retransmissions
1256 */ 1256 */
1257 if (sk->sk_send_head != NULL) { 1257 if (sk->sk_send_head != NULL) {
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index f89e55f814d9..d402e9020c68 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -153,6 +153,7 @@ static struct proto_ops dn_proto_ops;
153static DEFINE_RWLOCK(dn_hash_lock); 153static DEFINE_RWLOCK(dn_hash_lock);
154static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE]; 154static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
155static struct hlist_head dn_wild_sk; 155static struct hlist_head dn_wild_sk;
156static atomic_t decnet_memory_allocated;
156 157
157static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen, int flags); 158static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen, int flags);
158static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags); 159static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
@@ -446,10 +447,26 @@ static void dn_destruct(struct sock *sk)
446 dst_release(xchg(&sk->sk_dst_cache, NULL)); 447 dst_release(xchg(&sk->sk_dst_cache, NULL));
447} 448}
448 449
450static int dn_memory_pressure;
451
452static void dn_enter_memory_pressure(void)
453{
454 if (!dn_memory_pressure) {
455 dn_memory_pressure = 1;
456 }
457}
458
449static struct proto dn_proto = { 459static struct proto dn_proto = {
450 .name = "DECNET", 460 .name = "NSP",
451 .owner = THIS_MODULE, 461 .owner = THIS_MODULE,
452 .obj_size = sizeof(struct dn_sock), 462 .enter_memory_pressure = dn_enter_memory_pressure,
463 .memory_pressure = &dn_memory_pressure,
464 .memory_allocated = &decnet_memory_allocated,
465 .sysctl_mem = sysctl_decnet_mem,
466 .sysctl_wmem = sysctl_decnet_wmem,
467 .sysctl_rmem = sysctl_decnet_rmem,
468 .max_header = DN_MAX_NSP_DATA_HEADER + 64,
469 .obj_size = sizeof(struct dn_sock),
453}; 470};
454 471
455static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp) 472static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp)
@@ -470,6 +487,8 @@ static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp)
470 sk->sk_family = PF_DECnet; 487 sk->sk_family = PF_DECnet;
471 sk->sk_protocol = 0; 488 sk->sk_protocol = 0;
472 sk->sk_allocation = gfp; 489 sk->sk_allocation = gfp;
490 sk->sk_sndbuf = sysctl_decnet_wmem[1];
491 sk->sk_rcvbuf = sysctl_decnet_rmem[1];
473 492
474 /* Initialization of DECnet Session Control Port */ 493 /* Initialization of DECnet Session Control Port */
475 scp = DN_SK(sk); 494 scp = DN_SK(sk);
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 02bca49cb508..0e9d2c571165 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -10,6 +10,7 @@
10 * 10 *
11 * Changes: 11 * Changes:
12 * Steve Whitehouse - C99 changes and default device handling 12 * Steve Whitehouse - C99 changes and default device handling
13 * Steve Whitehouse - Memory buffer settings, like the tcp ones
13 * 14 *
14 */ 15 */
15#include <linux/config.h> 16#include <linux/config.h>
@@ -37,6 +38,11 @@ int decnet_dr_count = 3;
37int decnet_log_martians = 1; 38int decnet_log_martians = 1;
38int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW; 39int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW;
39 40
41/* Reasonable defaults, I hope, based on tcp's defaults */
42int sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
43int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
44int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
45
40#ifdef CONFIG_SYSCTL 46#ifdef CONFIG_SYSCTL
41extern int decnet_dst_gc_interval; 47extern int decnet_dst_gc_interval;
42static int min_decnet_time_wait[] = { 5 }; 48static int min_decnet_time_wait[] = { 5 };
@@ -428,6 +434,33 @@ static ctl_table dn_table[] = {
428 .extra1 = &min_decnet_no_fc_max_cwnd, 434 .extra1 = &min_decnet_no_fc_max_cwnd,
429 .extra2 = &max_decnet_no_fc_max_cwnd 435 .extra2 = &max_decnet_no_fc_max_cwnd
430 }, 436 },
437 {
438 .ctl_name = NET_DECNET_MEM,
439 .procname = "decnet_mem",
440 .data = &sysctl_decnet_mem,
441 .maxlen = sizeof(sysctl_decnet_mem),
442 .mode = 0644,
443 .proc_handler = &proc_dointvec,
444 .strategy = &sysctl_intvec,
445 },
446 {
447 .ctl_name = NET_DECNET_RMEM,
448 .procname = "decnet_rmem",
449 .data = &sysctl_decnet_rmem,
450 .maxlen = sizeof(sysctl_decnet_rmem),
451 .mode = 0644,
452 .proc_handler = &proc_dointvec,
453 .strategy = &sysctl_intvec,
454 },
455 {
456 .ctl_name = NET_DECNET_WMEM,
457 .procname = "decnet_wmem",
458 .data = &sysctl_decnet_wmem,
459 .maxlen = sizeof(sysctl_decnet_wmem),
460 .mode = 0644,
461 .proc_handler = &proc_dointvec,
462 .strategy = &sysctl_intvec,
463 },
431 { 464 {
432 .ctl_name = NET_DECNET_DEBUG_LEVEL, 465 .ctl_name = NET_DECNET_DEBUG_LEVEL,
433 .procname = "debug", 466 .procname = "debug",
diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig
index 91b16fbf91f0..d18ccba3ea9e 100644
--- a/net/ieee80211/Kconfig
+++ b/net/ieee80211/Kconfig
@@ -55,7 +55,7 @@ config IEEE80211_CRYPT_CCMP
55 55
56config IEEE80211_CRYPT_TKIP 56config IEEE80211_CRYPT_TKIP
57 tristate "IEEE 802.11i TKIP encryption" 57 tristate "IEEE 802.11i TKIP encryption"
58 depends on IEEE80211 58 depends on IEEE80211 && NET_RADIO
59 select CRYPTO 59 select CRYPTO
60 select CRYPTO_MICHAEL_MIC 60 select CRYPTO_MICHAEL_MIC
61 ---help--- 61 ---help---
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a4c347c3b8e3..46f9d9cf7a5f 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -618,7 +618,7 @@ static int ipgre_rcv(struct sk_buff *skb)
618 618
619 skb->mac.raw = skb->nh.raw; 619 skb->mac.raw = skb->nh.raw;
620 skb->nh.raw = __pskb_pull(skb, offset); 620 skb->nh.raw = __pskb_pull(skb, offset);
621 skb_postpull_rcsum(skb, skb->mac.raw, offset); 621 skb_postpull_rcsum(skb, skb->h.raw, offset);
622 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); 622 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
623 skb->pkt_type = PACKET_HOST; 623 skb->pkt_type = PACKET_HOST;
624#ifdef CONFIG_NET_IPGRE_BROADCAST 624#ifdef CONFIG_NET_IPGRE_BROADCAST
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 0bc00528d888..88a60650e6b8 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -56,8 +56,8 @@ config IP_NF_CONNTRACK_MARK
56 instead of the individual packets. 56 instead of the individual packets.
57 57
58config IP_NF_CONNTRACK_EVENTS 58config IP_NF_CONNTRACK_EVENTS
59 bool "Connection tracking events" 59 bool "Connection tracking events (EXPERIMENTAL)"
60 depends on IP_NF_CONNTRACK 60 depends on EXPERIMENTAL && IP_NF_CONNTRACK
61 help 61 help
62 If this option is enabled, the connection tracking code will 62 If this option is enabled, the connection tracking code will
63 provide a notifier chain that can be used by other kernel code 63 provide a notifier chain that can be used by other kernel code
@@ -66,8 +66,8 @@ config IP_NF_CONNTRACK_EVENTS
66 IF unsure, say `N'. 66 IF unsure, say `N'.
67 67
68config IP_NF_CONNTRACK_NETLINK 68config IP_NF_CONNTRACK_NETLINK
69 tristate 'Connection tracking netlink interface' 69 tristate 'Connection tracking netlink interface (EXPERIMENTAL)'
70 depends on IP_NF_CONNTRACK && NETFILTER_NETLINK 70 depends on EXPERIMENTAL && IP_NF_CONNTRACK && NETFILTER_NETLINK
71 depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m 71 depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
72 help 72 help
73 This option enables support for a netlink-based userspace interface 73 This option enables support for a netlink-based userspace interface
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 058c48e258fc..d0a447e520a2 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -12,6 +12,7 @@ ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o
12 12
13# connection tracking 13# connection tracking
14obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o 14obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
15obj-$(CONFIG_IP_NF_NAT) += ip_nat.o
15 16
16# conntrack netlink interface 17# conntrack netlink interface
17obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o 18obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
@@ -41,7 +42,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
41# the three instances of ip_tables 42# the three instances of ip_tables
42obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o 43obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
43obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o 44obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
44obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o ip_nat.o 45obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
45obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o 46obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
46 47
47# matches 48# matches
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 7a4ecddd597b..84c66dbfedaf 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -1345,6 +1345,11 @@ static int kill_all(struct ip_conntrack *i, void *data)
1345 return 1; 1345 return 1;
1346} 1346}
1347 1347
1348void ip_conntrack_flush(void)
1349{
1350 ip_ct_iterate_cleanup(kill_all, NULL);
1351}
1352
1348static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size) 1353static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
1349{ 1354{
1350 if (vmalloced) 1355 if (vmalloced)
@@ -1354,8 +1359,12 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
1354 get_order(sizeof(struct list_head) * size)); 1359 get_order(sizeof(struct list_head) * size));
1355} 1360}
1356 1361
1357void ip_conntrack_flush(void) 1362/* Mishearing the voices in his head, our hero wonders how he's
1363 supposed to kill the mall. */
1364void ip_conntrack_cleanup(void)
1358{ 1365{
1366 ip_ct_attach = NULL;
1367
1359 /* This makes sure all current packets have passed through 1368 /* This makes sure all current packets have passed through
1360 netfilter framework. Roll on, two-stage module 1369 netfilter framework. Roll on, two-stage module
1361 delete... */ 1370 delete... */
@@ -1363,7 +1372,7 @@ void ip_conntrack_flush(void)
1363 1372
1364 ip_ct_event_cache_flush(); 1373 ip_ct_event_cache_flush();
1365 i_see_dead_people: 1374 i_see_dead_people:
1366 ip_ct_iterate_cleanup(kill_all, NULL); 1375 ip_conntrack_flush();
1367 if (atomic_read(&ip_conntrack_count) != 0) { 1376 if (atomic_read(&ip_conntrack_count) != 0) {
1368 schedule(); 1377 schedule();
1369 goto i_see_dead_people; 1378 goto i_see_dead_people;
@@ -1371,14 +1380,7 @@ void ip_conntrack_flush(void)
1371 /* wait until all references to ip_conntrack_untracked are dropped */ 1380 /* wait until all references to ip_conntrack_untracked are dropped */
1372 while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1) 1381 while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
1373 schedule(); 1382 schedule();
1374}
1375 1383
1376/* Mishearing the voices in his head, our hero wonders how he's
1377 supposed to kill the mall. */
1378void ip_conntrack_cleanup(void)
1379{
1380 ip_ct_attach = NULL;
1381 ip_conntrack_flush();
1382 kmem_cache_destroy(ip_conntrack_cachep); 1384 kmem_cache_destroy(ip_conntrack_cachep);
1383 kmem_cache_destroy(ip_conntrack_expect_cachep); 1385 kmem_cache_destroy(ip_conntrack_expect_cachep);
1384 free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc, 1386 free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 3fce91bcc0ba..91fe8f2e38ff 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -503,7 +503,7 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple)
503} 503}
504 504
505static const size_t cta_min_proto[CTA_PROTO_MAX] = { 505static const size_t cta_min_proto[CTA_PROTO_MAX] = {
506 [CTA_PROTO_NUM-1] = sizeof(u_int16_t), 506 [CTA_PROTO_NUM-1] = sizeof(u_int8_t),
507 [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t), 507 [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t),
508 [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t), 508 [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t),
509 [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t), 509 [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t),
@@ -528,7 +528,7 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr,
528 528
529 if (!tb[CTA_PROTO_NUM-1]) 529 if (!tb[CTA_PROTO_NUM-1])
530 return -EINVAL; 530 return -EINVAL;
531 tuple->dst.protonum = *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]); 531 tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
532 532
533 proto = ip_conntrack_proto_find_get(tuple->dst.protonum); 533 proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
534 534
@@ -728,11 +728,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
728 return -ENOENT; 728 return -ENOENT;
729 } 729 }
730 } 730 }
731 if (del_timer(&ct->timeout)) { 731 if (del_timer(&ct->timeout))
732 ip_conntrack_put(ct);
733 ct->timeout.function((unsigned long)ct); 732 ct->timeout.function((unsigned long)ct);
734 return 0; 733
735 }
736 ip_conntrack_put(ct); 734 ip_conntrack_put(ct);
737 DEBUGP("leaving\n"); 735 DEBUGP("leaving\n");
738 736
@@ -877,7 +875,7 @@ ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[])
877 DEBUGP("NAT status: %lu\n", 875 DEBUGP("NAT status: %lu\n",
878 status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); 876 status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK));
879 877
880 if (ip_nat_initialized(ct, hooknum)) 878 if (ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
881 return -EEXIST; 879 return -EEXIST;
882 ip_nat_setup_info(ct, &range, hooknum); 880 ip_nat_setup_info(ct, &range, hooknum);
883 881
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index aeb7353d4777..e7fa29e576dc 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -341,9 +341,10 @@ static int tcp_print_conntrack(struct seq_file *s,
341static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, 341static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
342 const struct ip_conntrack *ct) 342 const struct ip_conntrack *ct)
343{ 343{
344 struct nfattr *nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP); 344 struct nfattr *nest_parms;
345 345
346 read_lock_bh(&tcp_lock); 346 read_lock_bh(&tcp_lock);
347 nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
347 NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), 348 NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
348 &ct->proto.tcp.state); 349 &ct->proto.tcp.state);
349 read_unlock_bh(&tcp_lock); 350 read_unlock_bh(&tcp_lock);
diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c
index 2215317c76b7..43c3bd7c118e 100644
--- a/net/ipv4/netfilter/ip_nat_tftp.c
+++ b/net/ipv4/netfilter/ip_nat_tftp.c
@@ -42,7 +42,10 @@ static unsigned int help(struct sk_buff **pskb,
42 enum ip_conntrack_info ctinfo, 42 enum ip_conntrack_info ctinfo,
43 struct ip_conntrack_expect *exp) 43 struct ip_conntrack_expect *exp)
44{ 44{
45 exp->saved_proto.udp.port = exp->tuple.dst.u.tcp.port; 45 struct ip_conntrack *ct = exp->master;
46
47 exp->saved_proto.udp.port
48 = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
46 exp->dir = IP_CT_DIR_REPLY; 49 exp->dir = IP_CT_DIR_REPLY;
47 exp->expectfn = ip_nat_follow_master; 50 exp->expectfn = ip_nat_follow_master;
48 if (ip_conntrack_expect_related(exp) != 0) 51 if (ip_conntrack_expect_related(exp) != 0)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 029c70dfb585..b7325e0b406a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -262,122 +262,139 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
262 * We are working here with either a clone of the original 262 * We are working here with either a clone of the original
263 * SKB, or a fresh unique copy made by the retransmit engine. 263 * SKB, or a fresh unique copy made by the retransmit engine.
264 */ 264 */
265static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) 265static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask)
266{ 266{
267 if (skb != NULL) { 267 const struct inet_connection_sock *icsk = inet_csk(sk);
268 const struct inet_connection_sock *icsk = inet_csk(sk); 268 struct inet_sock *inet;
269 struct inet_sock *inet = inet_sk(sk); 269 struct tcp_sock *tp;
270 struct tcp_sock *tp = tcp_sk(sk); 270 struct tcp_skb_cb *tcb;
271 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); 271 int tcp_header_size;
272 int tcp_header_size = tp->tcp_header_len; 272 struct tcphdr *th;
273 struct tcphdr *th; 273 int sysctl_flags;
274 int sysctl_flags; 274 int err;
275 int err; 275
276 BUG_ON(!skb || !tcp_skb_pcount(skb));
277
278 /* If congestion control is doing timestamping, we must
279 * take such a timestamp before we potentially clone/copy.
280 */
281 if (icsk->icsk_ca_ops->rtt_sample)
282 __net_timestamp(skb);
283
284 if (likely(clone_it)) {
285 if (unlikely(skb_cloned(skb)))
286 skb = pskb_copy(skb, gfp_mask);
287 else
288 skb = skb_clone(skb, gfp_mask);
289 if (unlikely(!skb))
290 return -ENOBUFS;
291 }
276 292
277 BUG_ON(!tcp_skb_pcount(skb)); 293 inet = inet_sk(sk);
294 tp = tcp_sk(sk);
295 tcb = TCP_SKB_CB(skb);
296 tcp_header_size = tp->tcp_header_len;
278 297
279#define SYSCTL_FLAG_TSTAMPS 0x1 298#define SYSCTL_FLAG_TSTAMPS 0x1
280#define SYSCTL_FLAG_WSCALE 0x2 299#define SYSCTL_FLAG_WSCALE 0x2
281#define SYSCTL_FLAG_SACK 0x4 300#define SYSCTL_FLAG_SACK 0x4
282 301
283 /* If congestion control is doing timestamping */ 302 sysctl_flags = 0;
284 if (icsk->icsk_ca_ops->rtt_sample) 303 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
285 __net_timestamp(skb); 304 tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
286 305 if(sysctl_tcp_timestamps) {
287 sysctl_flags = 0; 306 tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
288 if (tcb->flags & TCPCB_FLAG_SYN) { 307 sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
289 tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
290 if(sysctl_tcp_timestamps) {
291 tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
292 sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
293 }
294 if(sysctl_tcp_window_scaling) {
295 tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
296 sysctl_flags |= SYSCTL_FLAG_WSCALE;
297 }
298 if(sysctl_tcp_sack) {
299 sysctl_flags |= SYSCTL_FLAG_SACK;
300 if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
301 tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
302 }
303 } else if (tp->rx_opt.eff_sacks) {
304 /* A SACK is 2 pad bytes, a 2 byte header, plus
305 * 2 32-bit sequence numbers for each SACK block.
306 */
307 tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
308 (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
309 } 308 }
310 309 if (sysctl_tcp_window_scaling) {
311 if (tcp_packets_in_flight(tp) == 0) 310 tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
312 tcp_ca_event(sk, CA_EVENT_TX_START); 311 sysctl_flags |= SYSCTL_FLAG_WSCALE;
313
314 th = (struct tcphdr *) skb_push(skb, tcp_header_size);
315 skb->h.th = th;
316 skb_set_owner_w(skb, sk);
317
318 /* Build TCP header and checksum it. */
319 th->source = inet->sport;
320 th->dest = inet->dport;
321 th->seq = htonl(tcb->seq);
322 th->ack_seq = htonl(tp->rcv_nxt);
323 *(((__u16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->flags);
324 if (tcb->flags & TCPCB_FLAG_SYN) {
325 /* RFC1323: The window in SYN & SYN/ACK segments
326 * is never scaled.
327 */
328 th->window = htons(tp->rcv_wnd);
329 } else {
330 th->window = htons(tcp_select_window(sk));
331 } 312 }
332 th->check = 0; 313 if (sysctl_tcp_sack) {
333 th->urg_ptr = 0; 314 sysctl_flags |= SYSCTL_FLAG_SACK;
334 315 if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
335 if (tp->urg_mode && 316 tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
336 between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF)) {
337 th->urg_ptr = htons(tp->snd_up-tcb->seq);
338 th->urg = 1;
339 } 317 }
318 } else if (unlikely(tp->rx_opt.eff_sacks)) {
319 /* A SACK is 2 pad bytes, a 2 byte header, plus
320 * 2 32-bit sequence numbers for each SACK block.
321 */
322 tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
323 (tp->rx_opt.eff_sacks *
324 TCPOLEN_SACK_PERBLOCK));
325 }
326
327 if (tcp_packets_in_flight(tp) == 0)
328 tcp_ca_event(sk, CA_EVENT_TX_START);
329
330 th = (struct tcphdr *) skb_push(skb, tcp_header_size);
331 skb->h.th = th;
332 skb_set_owner_w(skb, sk);
333
334 /* Build TCP header and checksum it. */
335 th->source = inet->sport;
336 th->dest = inet->dport;
337 th->seq = htonl(tcb->seq);
338 th->ack_seq = htonl(tp->rcv_nxt);
339 *(((__u16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
340 tcb->flags);
341
342 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
343 /* RFC1323: The window in SYN & SYN/ACK segments
344 * is never scaled.
345 */
346 th->window = htons(tp->rcv_wnd);
347 } else {
348 th->window = htons(tcp_select_window(sk));
349 }
350 th->check = 0;
351 th->urg_ptr = 0;
340 352
341 if (tcb->flags & TCPCB_FLAG_SYN) { 353 if (unlikely(tp->urg_mode &&
342 tcp_syn_build_options((__u32 *)(th + 1), 354 between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF))) {
343 tcp_advertise_mss(sk), 355 th->urg_ptr = htons(tp->snd_up-tcb->seq);
344 (sysctl_flags & SYSCTL_FLAG_TSTAMPS), 356 th->urg = 1;
345 (sysctl_flags & SYSCTL_FLAG_SACK), 357 }
346 (sysctl_flags & SYSCTL_FLAG_WSCALE),
347 tp->rx_opt.rcv_wscale,
348 tcb->when,
349 tp->rx_opt.ts_recent);
350 } else {
351 tcp_build_and_update_options((__u32 *)(th + 1),
352 tp, tcb->when);
353 358
354 TCP_ECN_send(sk, tp, skb, tcp_header_size); 359 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
355 } 360 tcp_syn_build_options((__u32 *)(th + 1),
356 tp->af_specific->send_check(sk, th, skb->len, skb); 361 tcp_advertise_mss(sk),
362 (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
363 (sysctl_flags & SYSCTL_FLAG_SACK),
364 (sysctl_flags & SYSCTL_FLAG_WSCALE),
365 tp->rx_opt.rcv_wscale,
366 tcb->when,
367 tp->rx_opt.ts_recent);
368 } else {
369 tcp_build_and_update_options((__u32 *)(th + 1),
370 tp, tcb->when);
371 TCP_ECN_send(sk, tp, skb, tcp_header_size);
372 }
357 373
358 if (tcb->flags & TCPCB_FLAG_ACK) 374 tp->af_specific->send_check(sk, th, skb->len, skb);
359 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
360 375
361 if (skb->len != tcp_header_size) 376 if (likely(tcb->flags & TCPCB_FLAG_ACK))
362 tcp_event_data_sent(tp, skb, sk); 377 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
363 378
364 TCP_INC_STATS(TCP_MIB_OUTSEGS); 379 if (skb->len != tcp_header_size)
380 tcp_event_data_sent(tp, skb, sk);
365 381
366 err = tp->af_specific->queue_xmit(skb, 0); 382 TCP_INC_STATS(TCP_MIB_OUTSEGS);
367 if (err <= 0)
368 return err;
369 383
370 tcp_enter_cwr(sk); 384 err = tp->af_specific->queue_xmit(skb, 0);
385 if (unlikely(err <= 0))
386 return err;
387
388 tcp_enter_cwr(sk);
389
390 /* NET_XMIT_CN is special. It does not guarantee,
391 * that this packet is lost. It tells that device
392 * is about to start to drop packets or already
393 * drops some packets of the same priority and
394 * invokes us to send less aggressively.
395 */
396 return err == NET_XMIT_CN ? 0 : err;
371 397
372 /* NET_XMIT_CN is special. It does not guarantee,
373 * that this packet is lost. It tells that device
374 * is about to start to drop packets or already
375 * drops some packets of the same priority and
376 * invokes us to send less aggressively.
377 */
378 return err == NET_XMIT_CN ? 0 : err;
379 }
380 return -ENOBUFS;
381#undef SYSCTL_FLAG_TSTAMPS 398#undef SYSCTL_FLAG_TSTAMPS
382#undef SYSCTL_FLAG_WSCALE 399#undef SYSCTL_FLAG_WSCALE
383#undef SYSCTL_FLAG_SACK 400#undef SYSCTL_FLAG_SACK
@@ -1036,7 +1053,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1036 1053
1037 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1054 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1038 1055
1039 if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)))) 1056 if (unlikely(tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC)))
1040 break; 1057 break;
1041 1058
1042 /* Advance the send_head. This one is sent out. 1059 /* Advance the send_head. This one is sent out.
@@ -1109,7 +1126,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
1109 /* Send it out now. */ 1126 /* Send it out now. */
1110 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1127 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1111 1128
1112 if (likely(!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation)))) { 1129 if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
1113 update_send_head(sk, tp, skb); 1130 update_send_head(sk, tp, skb);
1114 tcp_cwnd_validate(sk, tp); 1131 tcp_cwnd_validate(sk, tp);
1115 return; 1132 return;
@@ -1429,9 +1446,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1429 */ 1446 */
1430 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1447 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1431 1448
1432 err = tcp_transmit_skb(sk, (skb_cloned(skb) ? 1449 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
1433 pskb_copy(skb, GFP_ATOMIC):
1434 skb_clone(skb, GFP_ATOMIC)));
1435 1450
1436 if (err == 0) { 1451 if (err == 0) {
1437 /* Update global TCP statistics. */ 1452 /* Update global TCP statistics. */
@@ -1665,7 +1680,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
1665 TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp); 1680 TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
1666 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq; 1681 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
1667 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1682 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1668 if (tcp_transmit_skb(sk, skb)) 1683 if (tcp_transmit_skb(sk, skb, 0, priority))
1669 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); 1684 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
1670} 1685}
1671 1686
@@ -1700,7 +1715,7 @@ int tcp_send_synack(struct sock *sk)
1700 TCP_ECN_send_synack(tcp_sk(sk), skb); 1715 TCP_ECN_send_synack(tcp_sk(sk), skb);
1701 } 1716 }
1702 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1717 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1703 return tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)); 1718 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
1704} 1719}
1705 1720
1706/* 1721/*
@@ -1861,7 +1876,7 @@ int tcp_connect(struct sock *sk)
1861 __skb_queue_tail(&sk->sk_write_queue, buff); 1876 __skb_queue_tail(&sk->sk_write_queue, buff);
1862 sk_charge_skb(sk, buff); 1877 sk_charge_skb(sk, buff);
1863 tp->packets_out += tcp_skb_pcount(buff); 1878 tp->packets_out += tcp_skb_pcount(buff);
1864 tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL)); 1879 tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
1865 TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); 1880 TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
1866 1881
1867 /* Timer for repeating the SYN until an answer. */ 1882 /* Timer for repeating the SYN until an answer. */
@@ -1957,7 +1972,7 @@ void tcp_send_ack(struct sock *sk)
1957 /* Send it off, this clears delayed acks for us. */ 1972 /* Send it off, this clears delayed acks for us. */
1958 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp); 1973 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
1959 TCP_SKB_CB(buff)->when = tcp_time_stamp; 1974 TCP_SKB_CB(buff)->when = tcp_time_stamp;
1960 tcp_transmit_skb(sk, buff); 1975 tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
1961 } 1976 }
1962} 1977}
1963 1978
@@ -1997,7 +2012,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
1997 TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1; 2012 TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1;
1998 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq; 2013 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
1999 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2014 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2000 return tcp_transmit_skb(sk, skb); 2015 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
2001} 2016}
2002 2017
2003int tcp_write_wakeup(struct sock *sk) 2018int tcp_write_wakeup(struct sock *sk)
@@ -2030,7 +2045,7 @@ int tcp_write_wakeup(struct sock *sk)
2030 2045
2031 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 2046 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
2032 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2047 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2033 err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)); 2048 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2034 if (!err) { 2049 if (!err) {
2035 update_send_head(sk, tp, skb); 2050 update_send_head(sk, tp, skb);
2036 } 2051 }
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index b7d296a8ac6d..13e7e6e8df16 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -215,14 +215,6 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
215 vegas->beg_snd_nxt = tp->snd_nxt; 215 vegas->beg_snd_nxt = tp->snd_nxt;
216 vegas->beg_snd_cwnd = tp->snd_cwnd; 216 vegas->beg_snd_cwnd = tp->snd_cwnd;
217 217
218 /* Take into account the current RTT sample too, to
219 * decrease the impact of delayed acks. This double counts
220 * this sample since we count it for the next window as well,
221 * but that's not too awful, since we're taking the min,
222 * rather than averaging.
223 */
224 tcp_vegas_rtt_calc(sk, seq_rtt * 1000);
225
226 /* We do the Vegas calculations only if we got enough RTT 218 /* We do the Vegas calculations only if we got enough RTT
227 * samples that we can be reasonably sure that we got 219 * samples that we can be reasonably sure that we got
228 * at least one RTT sample that wasn't from a delayed ACK. 220 * at least one RTT sample that wasn't from a delayed ACK.
@@ -333,11 +325,11 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
333 else if (tp->snd_cwnd > tp->snd_cwnd_clamp) 325 else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
334 tp->snd_cwnd = tp->snd_cwnd_clamp; 326 tp->snd_cwnd = tp->snd_cwnd_clamp;
335 } 327 }
336 }
337 328
338 /* Wipe the slate clean for the next RTT. */ 329 /* Wipe the slate clean for the next RTT. */
339 vegas->cntRTT = 0; 330 vegas->cntRTT = 0;
340 vegas->minRTT = 0x7fffffff; 331 vegas->minRTT = 0x7fffffff;
332 }
341} 333}
342 334
343/* Extract info for Tcp socket info provided via netlink. */ 335/* Extract info for Tcp socket info provided via netlink. */
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index b2b60f3e9cdd..42196ba3b0b9 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -182,6 +182,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
182 case IPPROTO_UDP: 182 case IPPROTO_UDP:
183 case IPPROTO_TCP: 183 case IPPROTO_TCP:
184 case IPPROTO_SCTP: 184 case IPPROTO_SCTP:
185 case IPPROTO_DCCP:
185 if (pskb_may_pull(skb, xprth + 4 - skb->data)) { 186 if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
186 u16 *ports = (u16 *)xprth; 187 u16 *ports = (u16 *)xprth;
187 188
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 76ff9f4fe89d..a60585fd85ad 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -137,6 +137,7 @@ static int addrconf_ifdown(struct net_device *dev, int how);
137static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags); 137static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
138static void addrconf_dad_timer(unsigned long data); 138static void addrconf_dad_timer(unsigned long data);
139static void addrconf_dad_completed(struct inet6_ifaddr *ifp); 139static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
140static void addrconf_dad_run(struct inet6_dev *idev);
140static void addrconf_rs_timer(unsigned long data); 141static void addrconf_rs_timer(unsigned long data);
141static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); 142static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
142static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); 143static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
@@ -379,8 +380,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
379 dev->type == ARPHRD_NONE || 380 dev->type == ARPHRD_NONE ||
380 dev->type == ARPHRD_SIT) { 381 dev->type == ARPHRD_SIT) {
381 printk(KERN_INFO 382 printk(KERN_INFO
382 "Disabled Privacy Extensions on device %p(%s)\n", 383 "%s: Disabled Privacy Extensions\n",
383 dev, dev->name); 384 dev->name);
384 ndev->cnf.use_tempaddr = -1; 385 ndev->cnf.use_tempaddr = -1;
385 } else { 386 } else {
386 in6_dev_hold(ndev); 387 in6_dev_hold(ndev);
@@ -388,6 +389,9 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
388 } 389 }
389#endif 390#endif
390 391
392 if (netif_carrier_ok(dev))
393 ndev->if_flags |= IF_READY;
394
391 write_lock_bh(&addrconf_lock); 395 write_lock_bh(&addrconf_lock);
392 dev->ip6_ptr = ndev; 396 dev->ip6_ptr = ndev;
393 write_unlock_bh(&addrconf_lock); 397 write_unlock_bh(&addrconf_lock);
@@ -415,6 +419,7 @@ static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
415 if ((idev = ipv6_add_dev(dev)) == NULL) 419 if ((idev = ipv6_add_dev(dev)) == NULL)
416 return NULL; 420 return NULL;
417 } 421 }
422
418 if (dev->flags&IFF_UP) 423 if (dev->flags&IFF_UP)
419 ipv6_mc_up(idev); 424 ipv6_mc_up(idev);
420 return idev; 425 return idev;
@@ -634,8 +639,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
634 } 639 }
635#endif 640#endif
636 641
637 for (ifap = &idev->addr_list; (ifa=*ifap) != NULL; 642 for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;) {
638 ifap = &ifa->if_next) {
639 if (ifa == ifp) { 643 if (ifa == ifp) {
640 *ifap = ifa->if_next; 644 *ifap = ifa->if_next;
641 __in6_ifa_put(ifp); 645 __in6_ifa_put(ifp);
@@ -643,6 +647,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
643 if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0) 647 if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0)
644 break; 648 break;
645 deleted = 1; 649 deleted = 1;
650 continue;
646 } else if (ifp->flags & IFA_F_PERMANENT) { 651 } else if (ifp->flags & IFA_F_PERMANENT) {
647 if (ipv6_prefix_equal(&ifa->addr, &ifp->addr, 652 if (ipv6_prefix_equal(&ifa->addr, &ifp->addr,
648 ifp->prefix_len)) { 653 ifp->prefix_len)) {
@@ -666,6 +671,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
666 } 671 }
667 } 672 }
668 } 673 }
674 ifap = &ifa->if_next;
669 } 675 }
670 write_unlock_bh(&idev->lock); 676 write_unlock_bh(&idev->lock);
671 677
@@ -903,11 +909,18 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
903 909
904 score.addr_type = __ipv6_addr_type(&ifa->addr); 910 score.addr_type = __ipv6_addr_type(&ifa->addr);
905 911
906 /* Rule 0: Candidate Source Address (section 4) 912 /* Rule 0:
913 * - Tentative Address (RFC2462 section 5.4)
914 * - A tentative address is not considered
915 * "assigned to an interface" in the traditional
916 * sense.
917 * - Candidate Source Address (section 4)
907 * - In any case, anycast addresses, multicast 918 * - In any case, anycast addresses, multicast
908 * addresses, and the unspecified address MUST 919 * addresses, and the unspecified address MUST
909 * NOT be included in a candidate set. 920 * NOT be included in a candidate set.
910 */ 921 */
922 if (ifa->flags & IFA_F_TENTATIVE)
923 continue;
911 if (unlikely(score.addr_type == IPV6_ADDR_ANY || 924 if (unlikely(score.addr_type == IPV6_ADDR_ANY ||
912 score.addr_type & IPV6_ADDR_MULTICAST)) { 925 score.addr_type & IPV6_ADDR_MULTICAST)) {
913 LIMIT_NETDEBUG(KERN_DEBUG 926 LIMIT_NETDEBUG(KERN_DEBUG
@@ -1215,10 +1228,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
1215 1228
1216/* Gets referenced address, destroys ifaddr */ 1229/* Gets referenced address, destroys ifaddr */
1217 1230
1218void addrconf_dad_failure(struct inet6_ifaddr *ifp) 1231void addrconf_dad_stop(struct inet6_ifaddr *ifp)
1219{ 1232{
1220 if (net_ratelimit())
1221 printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name);
1222 if (ifp->flags&IFA_F_PERMANENT) { 1233 if (ifp->flags&IFA_F_PERMANENT) {
1223 spin_lock_bh(&ifp->lock); 1234 spin_lock_bh(&ifp->lock);
1224 addrconf_del_timer(ifp); 1235 addrconf_del_timer(ifp);
@@ -1244,6 +1255,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
1244 ipv6_del_addr(ifp); 1255 ipv6_del_addr(ifp);
1245} 1256}
1246 1257
1258void addrconf_dad_failure(struct inet6_ifaddr *ifp)
1259{
1260 if (net_ratelimit())
1261 printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name);
1262 addrconf_dad_stop(ifp);
1263}
1247 1264
1248/* Join to solicited addr multicast group. */ 1265/* Join to solicited addr multicast group. */
1249 1266
@@ -1596,9 +1613,17 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
1596 not good. 1613 not good.
1597 */ 1614 */
1598 if (valid_lft >= 0x7FFFFFFF/HZ) 1615 if (valid_lft >= 0x7FFFFFFF/HZ)
1599 rt_expires = 0; 1616 rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ);
1600 else 1617 else
1601 rt_expires = jiffies + valid_lft * HZ; 1618 rt_expires = valid_lft * HZ;
1619
1620 /*
1621 * We convert this (in jiffies) to clock_t later.
1622 * Avoid arithmetic overflow there as well.
1623 * Overflow can happen only if HZ < USER_HZ.
1624 */
1625 if (HZ < USER_HZ && rt_expires > 0x7FFFFFFF / USER_HZ)
1626 rt_expires = 0x7FFFFFFF / USER_HZ;
1602 1627
1603 if (pinfo->onlink) { 1628 if (pinfo->onlink) {
1604 struct rt6_info *rt; 1629 struct rt6_info *rt;
@@ -1610,12 +1635,12 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
1610 ip6_del_rt(rt, NULL, NULL, NULL); 1635 ip6_del_rt(rt, NULL, NULL, NULL);
1611 rt = NULL; 1636 rt = NULL;
1612 } else { 1637 } else {
1613 rt->rt6i_expires = rt_expires; 1638 rt->rt6i_expires = jiffies + rt_expires;
1614 } 1639 }
1615 } 1640 }
1616 } else if (valid_lft) { 1641 } else if (valid_lft) {
1617 addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, 1642 addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
1618 dev, rt_expires, RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT); 1643 dev, jiffies_to_clock_t(rt_expires), RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT);
1619 } 1644 }
1620 if (rt) 1645 if (rt)
1621 dst_release(&rt->u.dst); 1646 dst_release(&rt->u.dst);
@@ -2125,9 +2150,42 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2125{ 2150{
2126 struct net_device *dev = (struct net_device *) data; 2151 struct net_device *dev = (struct net_device *) data;
2127 struct inet6_dev *idev = __in6_dev_get(dev); 2152 struct inet6_dev *idev = __in6_dev_get(dev);
2153 int run_pending = 0;
2128 2154
2129 switch(event) { 2155 switch(event) {
2130 case NETDEV_UP: 2156 case NETDEV_UP:
2157 case NETDEV_CHANGE:
2158 if (event == NETDEV_UP) {
2159 if (!netif_carrier_ok(dev)) {
2160 /* device is not ready yet. */
2161 printk(KERN_INFO
2162 "ADDRCONF(NETDEV_UP): %s: "
2163 "link is not ready\n",
2164 dev->name);
2165 break;
2166 }
2167 } else {
2168 if (!netif_carrier_ok(dev)) {
2169 /* device is still not ready. */
2170 break;
2171 }
2172
2173 if (idev) {
2174 if (idev->if_flags & IF_READY) {
2175 /* device is already configured. */
2176 break;
2177 }
2178 idev->if_flags |= IF_READY;
2179 }
2180
2181 printk(KERN_INFO
2182 "ADDRCONF(NETDEV_CHANGE): %s: "
2183 "link becomes ready\n",
2184 dev->name);
2185
2186 run_pending = 1;
2187 }
2188
2131 switch(dev->type) { 2189 switch(dev->type) {
2132 case ARPHRD_SIT: 2190 case ARPHRD_SIT:
2133 addrconf_sit_config(dev); 2191 addrconf_sit_config(dev);
@@ -2144,6 +2202,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2144 break; 2202 break;
2145 }; 2203 };
2146 if (idev) { 2204 if (idev) {
2205 if (run_pending)
2206 addrconf_dad_run(idev);
2207
2147 /* If the MTU changed during the interface down, when the 2208 /* If the MTU changed during the interface down, when the
2148 interface up, the changed MTU must be reflected in the 2209 interface up, the changed MTU must be reflected in the
2149 idev as well as routers. 2210 idev as well as routers.
@@ -2178,8 +2239,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2178 */ 2239 */
2179 addrconf_ifdown(dev, event != NETDEV_DOWN); 2240 addrconf_ifdown(dev, event != NETDEV_DOWN);
2180 break; 2241 break;
2181 case NETDEV_CHANGE: 2242
2182 break;
2183 case NETDEV_CHANGENAME: 2243 case NETDEV_CHANGENAME:
2184#ifdef CONFIG_SYSCTL 2244#ifdef CONFIG_SYSCTL
2185 if (idev) { 2245 if (idev) {
@@ -2260,7 +2320,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2260 2320
2261 /* Step 3: clear flags for stateless addrconf */ 2321 /* Step 3: clear flags for stateless addrconf */
2262 if (how != 1) 2322 if (how != 1)
2263 idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD); 2323 idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
2264 2324
2265 /* Step 4: clear address list */ 2325 /* Step 4: clear address list */
2266#ifdef CONFIG_IPV6_PRIVACY 2326#ifdef CONFIG_IPV6_PRIVACY
@@ -2369,11 +2429,20 @@ out:
2369/* 2429/*
2370 * Duplicate Address Detection 2430 * Duplicate Address Detection
2371 */ 2431 */
2432static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
2433{
2434 unsigned long rand_num;
2435 struct inet6_dev *idev = ifp->idev;
2436
2437 rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
2438 ifp->probes = idev->cnf.dad_transmits;
2439 addrconf_mod_timer(ifp, AC_DAD, rand_num);
2440}
2441
2372static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) 2442static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
2373{ 2443{
2374 struct inet6_dev *idev = ifp->idev; 2444 struct inet6_dev *idev = ifp->idev;
2375 struct net_device *dev = idev->dev; 2445 struct net_device *dev = idev->dev;
2376 unsigned long rand_num;
2377 2446
2378 addrconf_join_solict(dev, &ifp->addr); 2447 addrconf_join_solict(dev, &ifp->addr);
2379 2448
@@ -2382,7 +2451,6 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
2382 flags); 2451 flags);
2383 2452
2384 net_srandom(ifp->addr.s6_addr32[3]); 2453 net_srandom(ifp->addr.s6_addr32[3]);
2385 rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
2386 2454
2387 read_lock_bh(&idev->lock); 2455 read_lock_bh(&idev->lock);
2388 if (ifp->dead) 2456 if (ifp->dead)
@@ -2399,9 +2467,19 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
2399 return; 2467 return;
2400 } 2468 }
2401 2469
2402 ifp->probes = idev->cnf.dad_transmits; 2470 if (!(idev->if_flags & IF_READY)) {
2403 addrconf_mod_timer(ifp, AC_DAD, rand_num); 2471 spin_unlock_bh(&ifp->lock);
2404 2472 read_unlock_bh(&idev->lock);
2473 /*
2474 * If the defice is not ready:
2475 * - keep it tentative if it is a permanent address.
2476 * - otherwise, kill it.
2477 */
2478 in6_ifa_hold(ifp);
2479 addrconf_dad_stop(ifp);
2480 return;
2481 }
2482 addrconf_dad_kick(ifp);
2405 spin_unlock_bh(&ifp->lock); 2483 spin_unlock_bh(&ifp->lock);
2406out: 2484out:
2407 read_unlock_bh(&idev->lock); 2485 read_unlock_bh(&idev->lock);
@@ -2484,6 +2562,22 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
2484 } 2562 }
2485} 2563}
2486 2564
2565static void addrconf_dad_run(struct inet6_dev *idev) {
2566 struct inet6_ifaddr *ifp;
2567
2568 read_lock_bh(&idev->lock);
2569 for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) {
2570 spin_lock_bh(&ifp->lock);
2571 if (!(ifp->flags & IFA_F_TENTATIVE)) {
2572 spin_unlock_bh(&ifp->lock);
2573 continue;
2574 }
2575 spin_unlock_bh(&ifp->lock);
2576 addrconf_dad_kick(ifp);
2577 }
2578 read_unlock_bh(&idev->lock);
2579}
2580
2487#ifdef CONFIG_PROC_FS 2581#ifdef CONFIG_PROC_FS
2488struct if6_iter_state { 2582struct if6_iter_state {
2489 int bucket; 2583 int bucket;
@@ -2689,6 +2783,9 @@ restart:
2689 in6_ifa_hold(ifpub); 2783 in6_ifa_hold(ifpub);
2690 spin_unlock(&ifp->lock); 2784 spin_unlock(&ifp->lock);
2691 read_unlock(&addrconf_hash_lock); 2785 read_unlock(&addrconf_hash_lock);
2786 spin_lock(&ifpub->lock);
2787 ifpub->regen_count = 0;
2788 spin_unlock(&ifpub->lock);
2692 ipv6_create_tempaddr(ifpub, ifp); 2789 ipv6_create_tempaddr(ifpub, ifp);
2693 in6_ifa_put(ifpub); 2790 in6_ifa_put(ifpub);
2694 in6_ifa_put(ifp); 2791 in6_ifa_put(ifp);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 40d9a1935ab5..8bfbe9970793 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -248,7 +248,7 @@ static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
248 if (esp->conf.padlen) 248 if (esp->conf.padlen)
249 mtu = ALIGN(mtu, esp->conf.padlen); 249 mtu = ALIGN(mtu, esp->conf.padlen);
250 250
251 return mtu + x->props.header_len + esp->auth.icv_full_len; 251 return mtu + x->props.header_len + esp->auth.icv_trunc_len;
252} 252}
253 253
254static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 254static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 34a332225c17..6ec6a2b549bb 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -328,8 +328,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
328 iif = skb->dev->ifindex; 328 iif = skb->dev->ifindex;
329 329
330 /* 330 /*
331 * Must not send if we know that source is Anycast also. 331 * Must not send error if the source does not uniquely
332 * for now we don't know that. 332 * identify a single node (RFC2463 Section 2.4).
333 * We check unspecified / multicast addresses here,
334 * and anycast addresses will be checked later.
333 */ 335 */
334 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { 336 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
335 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n"); 337 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
@@ -373,6 +375,16 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
373 err = ip6_dst_lookup(sk, &dst, &fl); 375 err = ip6_dst_lookup(sk, &dst, &fl);
374 if (err) 376 if (err)
375 goto out; 377 goto out;
378
379 /*
380 * We won't send icmp if the destination is known
381 * anycast.
382 */
383 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
384 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
385 goto out_dst_release;
386 }
387
376 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) 388 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
377 goto out; 389 goto out;
378 390
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index fd939da090c4..f829a4ad3ccc 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -170,7 +170,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
170#define MLDV2_QQIC(value) MLDV2_EXP(0x80, 4, 3, value) 170#define MLDV2_QQIC(value) MLDV2_EXP(0x80, 4, 3, value)
171#define MLDV2_MRC(value) MLDV2_EXP(0x8000, 12, 3, value) 171#define MLDV2_MRC(value) MLDV2_EXP(0x8000, 12, 3, value)
172 172
173#define IPV6_MLD_MAX_MSF 10 173#define IPV6_MLD_MAX_MSF 64
174 174
175int sysctl_mld_max_msf = IPV6_MLD_MAX_MSF; 175int sysctl_mld_max_msf = IPV6_MLD_MAX_MSF;
176 176
@@ -224,6 +224,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
224 224
225 mc_lst->ifindex = dev->ifindex; 225 mc_lst->ifindex = dev->ifindex;
226 mc_lst->sfmode = MCAST_EXCLUDE; 226 mc_lst->sfmode = MCAST_EXCLUDE;
227 mc_lst->sflock = RW_LOCK_UNLOCKED;
227 mc_lst->sflist = NULL; 228 mc_lst->sflist = NULL;
228 229
229 /* 230 /*
@@ -360,6 +361,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
360 struct ip6_sf_socklist *psl; 361 struct ip6_sf_socklist *psl;
361 int i, j, rv; 362 int i, j, rv;
362 int leavegroup = 0; 363 int leavegroup = 0;
364 int pmclocked = 0;
363 int err; 365 int err;
364 366
365 if (pgsr->gsr_group.ss_family != AF_INET6 || 367 if (pgsr->gsr_group.ss_family != AF_INET6 ||
@@ -403,6 +405,9 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
403 pmc->sfmode = omode; 405 pmc->sfmode = omode;
404 } 406 }
405 407
408 write_lock_bh(&pmc->sflock);
409 pmclocked = 1;
410
406 psl = pmc->sflist; 411 psl = pmc->sflist;
407 if (!add) { 412 if (!add) {
408 if (!psl) 413 if (!psl)
@@ -475,6 +480,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
475 /* update the interface list */ 480 /* update the interface list */
476 ip6_mc_add_src(idev, group, omode, 1, source, 1); 481 ip6_mc_add_src(idev, group, omode, 1, source, 1);
477done: 482done:
483 if (pmclocked)
484 write_unlock_bh(&pmc->sflock);
478 read_unlock_bh(&ipv6_sk_mc_lock); 485 read_unlock_bh(&ipv6_sk_mc_lock);
479 read_unlock_bh(&idev->lock); 486 read_unlock_bh(&idev->lock);
480 in6_dev_put(idev); 487 in6_dev_put(idev);
@@ -510,6 +517,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
510 dev = idev->dev; 517 dev = idev->dev;
511 518
512 err = 0; 519 err = 0;
520 read_lock_bh(&ipv6_sk_mc_lock);
521
513 if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) { 522 if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) {
514 leavegroup = 1; 523 leavegroup = 1;
515 goto done; 524 goto done;
@@ -549,6 +558,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
549 newpsl = NULL; 558 newpsl = NULL;
550 (void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0); 559 (void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0);
551 } 560 }
561
562 write_lock_bh(&pmc->sflock);
552 psl = pmc->sflist; 563 psl = pmc->sflist;
553 if (psl) { 564 if (psl) {
554 (void) ip6_mc_del_src(idev, group, pmc->sfmode, 565 (void) ip6_mc_del_src(idev, group, pmc->sfmode,
@@ -558,8 +569,10 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
558 (void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); 569 (void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
559 pmc->sflist = newpsl; 570 pmc->sflist = newpsl;
560 pmc->sfmode = gsf->gf_fmode; 571 pmc->sfmode = gsf->gf_fmode;
572 write_unlock_bh(&pmc->sflock);
561 err = 0; 573 err = 0;
562done: 574done:
575 read_unlock_bh(&ipv6_sk_mc_lock);
563 read_unlock_bh(&idev->lock); 576 read_unlock_bh(&idev->lock);
564 in6_dev_put(idev); 577 in6_dev_put(idev);
565 dev_put(dev); 578 dev_put(dev);
@@ -592,6 +605,11 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
592 dev = idev->dev; 605 dev = idev->dev;
593 606
594 err = -EADDRNOTAVAIL; 607 err = -EADDRNOTAVAIL;
608 /*
609 * changes to the ipv6_mc_list require the socket lock and
610 * a read lock on ip6_sk_mc_lock. We have the socket lock,
611 * so reading the list is safe.
612 */
595 613
596 for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { 614 for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
597 if (pmc->ifindex != gsf->gf_interface) 615 if (pmc->ifindex != gsf->gf_interface)
@@ -614,6 +632,10 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
614 copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) { 632 copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) {
615 return -EFAULT; 633 return -EFAULT;
616 } 634 }
635 /* changes to psl require the socket lock, a read lock on
636 * on ipv6_sk_mc_lock and a write lock on pmc->sflock. We
637 * have the socket lock, so reading here is safe.
638 */
617 for (i=0; i<copycount; i++) { 639 for (i=0; i<copycount; i++) {
618 struct sockaddr_in6 *psin6; 640 struct sockaddr_in6 *psin6;
619 struct sockaddr_storage ss; 641 struct sockaddr_storage ss;
@@ -650,6 +672,7 @@ int inet6_mc_check(struct sock *sk, struct in6_addr *mc_addr,
650 read_unlock(&ipv6_sk_mc_lock); 672 read_unlock(&ipv6_sk_mc_lock);
651 return 1; 673 return 1;
652 } 674 }
675 read_lock(&mc->sflock);
653 psl = mc->sflist; 676 psl = mc->sflist;
654 if (!psl) { 677 if (!psl) {
655 rv = mc->sfmode == MCAST_EXCLUDE; 678 rv = mc->sfmode == MCAST_EXCLUDE;
@@ -665,6 +688,7 @@ int inet6_mc_check(struct sock *sk, struct in6_addr *mc_addr,
665 if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count) 688 if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count)
666 rv = 0; 689 rv = 0;
667 } 690 }
691 read_unlock(&mc->sflock);
668 read_unlock(&ipv6_sk_mc_lock); 692 read_unlock(&ipv6_sk_mc_lock);
669 693
670 return rv; 694 return rv;
@@ -1068,7 +1092,8 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
1068 ma->mca_flags |= MAF_TIMER_RUNNING; 1092 ma->mca_flags |= MAF_TIMER_RUNNING;
1069} 1093}
1070 1094
1071static void mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, 1095/* mark EXCLUDE-mode sources */
1096static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
1072 struct in6_addr *srcs) 1097 struct in6_addr *srcs)
1073{ 1098{
1074 struct ip6_sf_list *psf; 1099 struct ip6_sf_list *psf;
@@ -1078,13 +1103,53 @@ static void mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
1078 for (psf=pmc->mca_sources; psf; psf=psf->sf_next) { 1103 for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
1079 if (scount == nsrcs) 1104 if (scount == nsrcs)
1080 break; 1105 break;
1081 for (i=0; i<nsrcs; i++) 1106 for (i=0; i<nsrcs; i++) {
1107 /* skip inactive filters */
1108 if (pmc->mca_sfcount[MCAST_INCLUDE] ||
1109 pmc->mca_sfcount[MCAST_EXCLUDE] !=
1110 psf->sf_count[MCAST_EXCLUDE])
1111 continue;
1112 if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) {
1113 scount++;
1114 break;
1115 }
1116 }
1117 }
1118 pmc->mca_flags &= ~MAF_GSQUERY;
1119 if (scount == nsrcs) /* all sources excluded */
1120 return 0;
1121 return 1;
1122}
1123
1124static int mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
1125 struct in6_addr *srcs)
1126{
1127 struct ip6_sf_list *psf;
1128 int i, scount;
1129
1130 if (pmc->mca_sfmode == MCAST_EXCLUDE)
1131 return mld_xmarksources(pmc, nsrcs, srcs);
1132
1133 /* mark INCLUDE-mode sources */
1134
1135 scount = 0;
1136 for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
1137 if (scount == nsrcs)
1138 break;
1139 for (i=0; i<nsrcs; i++) {
1082 if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) { 1140 if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) {
1083 psf->sf_gsresp = 1; 1141 psf->sf_gsresp = 1;
1084 scount++; 1142 scount++;
1085 break; 1143 break;
1086 } 1144 }
1145 }
1146 }
1147 if (!scount) {
1148 pmc->mca_flags &= ~MAF_GSQUERY;
1149 return 0;
1087 } 1150 }
1151 pmc->mca_flags |= MAF_GSQUERY;
1152 return 1;
1088} 1153}
1089 1154
1090int igmp6_event_query(struct sk_buff *skb) 1155int igmp6_event_query(struct sk_buff *skb)
@@ -1167,7 +1232,7 @@ int igmp6_event_query(struct sk_buff *skb)
1167 /* mark sources to include, if group & source-specific */ 1232 /* mark sources to include, if group & source-specific */
1168 if (mlh2->nsrcs != 0) { 1233 if (mlh2->nsrcs != 0) {
1169 if (!pskb_may_pull(skb, srcs_offset + 1234 if (!pskb_may_pull(skb, srcs_offset +
1170 mlh2->nsrcs * sizeof(struct in6_addr))) { 1235 ntohs(mlh2->nsrcs) * sizeof(struct in6_addr))) {
1171 in6_dev_put(idev); 1236 in6_dev_put(idev);
1172 return -EINVAL; 1237 return -EINVAL;
1173 } 1238 }
@@ -1203,10 +1268,9 @@ int igmp6_event_query(struct sk_buff *skb)
1203 else 1268 else
1204 ma->mca_flags &= ~MAF_GSQUERY; 1269 ma->mca_flags &= ~MAF_GSQUERY;
1205 } 1270 }
1206 if (ma->mca_flags & MAF_GSQUERY) 1271 if (!(ma->mca_flags & MAF_GSQUERY) ||
1207 mld_marksources(ma, ntohs(mlh2->nsrcs), 1272 mld_marksources(ma, ntohs(mlh2->nsrcs), mlh2->srcs))
1208 mlh2->srcs); 1273 igmp6_group_queried(ma, max_delay);
1209 igmp6_group_queried(ma, max_delay);
1210 spin_unlock_bh(&ma->mca_lock); 1274 spin_unlock_bh(&ma->mca_lock);
1211 if (group_type != IPV6_ADDR_ANY) 1275 if (group_type != IPV6_ADDR_ANY)
1212 break; 1276 break;
@@ -1281,7 +1345,18 @@ static int is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
1281 case MLD2_MODE_IS_EXCLUDE: 1345 case MLD2_MODE_IS_EXCLUDE:
1282 if (gdeleted || sdeleted) 1346 if (gdeleted || sdeleted)
1283 return 0; 1347 return 0;
1284 return !((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp); 1348 if (!((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp)) {
1349 if (pmc->mca_sfmode == MCAST_INCLUDE)
1350 return 1;
1351 /* don't include if this source is excluded
1352 * in all filters
1353 */
1354 if (psf->sf_count[MCAST_INCLUDE])
1355 return 0;
1356 return pmc->mca_sfcount[MCAST_EXCLUDE] ==
1357 psf->sf_count[MCAST_EXCLUDE];
1358 }
1359 return 0;
1285 case MLD2_CHANGE_TO_INCLUDE: 1360 case MLD2_CHANGE_TO_INCLUDE:
1286 if (gdeleted || sdeleted) 1361 if (gdeleted || sdeleted)
1287 return 0; 1362 return 0;
@@ -1450,7 +1525,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1450 struct mld2_report *pmr; 1525 struct mld2_report *pmr;
1451 struct mld2_grec *pgr = NULL; 1526 struct mld2_grec *pgr = NULL;
1452 struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; 1527 struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
1453 int scount, first, isquery, truncate; 1528 int scount, stotal, first, isquery, truncate;
1454 1529
1455 if (pmc->mca_flags & MAF_NOREPORT) 1530 if (pmc->mca_flags & MAF_NOREPORT)
1456 return skb; 1531 return skb;
@@ -1460,25 +1535,13 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1460 truncate = type == MLD2_MODE_IS_EXCLUDE || 1535 truncate = type == MLD2_MODE_IS_EXCLUDE ||
1461 type == MLD2_CHANGE_TO_EXCLUDE; 1536 type == MLD2_CHANGE_TO_EXCLUDE;
1462 1537
1538 stotal = scount = 0;
1539
1463 psf_list = sdeleted ? &pmc->mca_tomb : &pmc->mca_sources; 1540 psf_list = sdeleted ? &pmc->mca_tomb : &pmc->mca_sources;
1464 1541
1465 if (!*psf_list) { 1542 if (!*psf_list)
1466 if (type == MLD2_ALLOW_NEW_SOURCES || 1543 goto empty_source;
1467 type == MLD2_BLOCK_OLD_SOURCES) 1544
1468 return skb;
1469 if (pmc->mca_crcount || isquery) {
1470 /* make sure we have room for group header and at
1471 * least one source.
1472 */
1473 if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)+
1474 sizeof(struct in6_addr)) {
1475 mld_sendpack(skb);
1476 skb = NULL; /* add_grhead will get a new one */
1477 }
1478 skb = add_grhead(skb, pmc, type, &pgr);
1479 }
1480 return skb;
1481 }
1482 pmr = skb ? (struct mld2_report *)skb->h.raw : NULL; 1545 pmr = skb ? (struct mld2_report *)skb->h.raw : NULL;
1483 1546
1484 /* EX and TO_EX get a fresh packet, if needed */ 1547 /* EX and TO_EX get a fresh packet, if needed */
@@ -1491,7 +1554,6 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1491 } 1554 }
1492 } 1555 }
1493 first = 1; 1556 first = 1;
1494 scount = 0;
1495 psf_prev = NULL; 1557 psf_prev = NULL;
1496 for (psf=*psf_list; psf; psf=psf_next) { 1558 for (psf=*psf_list; psf; psf=psf_next) {
1497 struct in6_addr *psrc; 1559 struct in6_addr *psrc;
@@ -1525,7 +1587,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1525 } 1587 }
1526 psrc = (struct in6_addr *)skb_put(skb, sizeof(*psrc)); 1588 psrc = (struct in6_addr *)skb_put(skb, sizeof(*psrc));
1527 *psrc = psf->sf_addr; 1589 *psrc = psf->sf_addr;
1528 scount++; 1590 scount++; stotal++;
1529 if ((type == MLD2_ALLOW_NEW_SOURCES || 1591 if ((type == MLD2_ALLOW_NEW_SOURCES ||
1530 type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) { 1592 type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
1531 psf->sf_crcount--; 1593 psf->sf_crcount--;
@@ -1540,6 +1602,21 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1540 } 1602 }
1541 psf_prev = psf; 1603 psf_prev = psf;
1542 } 1604 }
1605
1606empty_source:
1607 if (!stotal) {
1608 if (type == MLD2_ALLOW_NEW_SOURCES ||
1609 type == MLD2_BLOCK_OLD_SOURCES)
1610 return skb;
1611 if (pmc->mca_crcount || isquery) {
1612 /* make sure we have room for group header */
1613 if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)) {
1614 mld_sendpack(skb);
1615 skb = NULL; /* add_grhead will get a new one */
1616 }
1617 skb = add_grhead(skb, pmc, type, &pgr);
1618 }
1619 }
1543 if (pgr) 1620 if (pgr)
1544 pgr->grec_nsrcs = htons(scount); 1621 pgr->grec_nsrcs = htons(scount);
1545 1622
@@ -1621,11 +1698,11 @@ static void mld_send_cr(struct inet6_dev *idev)
1621 skb = add_grec(skb, pmc, dtype, 1, 1); 1698 skb = add_grec(skb, pmc, dtype, 1, 1);
1622 } 1699 }
1623 if (pmc->mca_crcount) { 1700 if (pmc->mca_crcount) {
1624 pmc->mca_crcount--;
1625 if (pmc->mca_sfmode == MCAST_EXCLUDE) { 1701 if (pmc->mca_sfmode == MCAST_EXCLUDE) {
1626 type = MLD2_CHANGE_TO_INCLUDE; 1702 type = MLD2_CHANGE_TO_INCLUDE;
1627 skb = add_grec(skb, pmc, type, 1, 0); 1703 skb = add_grec(skb, pmc, type, 1, 0);
1628 } 1704 }
1705 pmc->mca_crcount--;
1629 if (pmc->mca_crcount == 0) { 1706 if (pmc->mca_crcount == 0) {
1630 mld_clear_zeros(&pmc->mca_tomb); 1707 mld_clear_zeros(&pmc->mca_tomb);
1631 mld_clear_zeros(&pmc->mca_sources); 1708 mld_clear_zeros(&pmc->mca_sources);
@@ -1659,12 +1736,12 @@ static void mld_send_cr(struct inet6_dev *idev)
1659 1736
1660 /* filter mode changes */ 1737 /* filter mode changes */
1661 if (pmc->mca_crcount) { 1738 if (pmc->mca_crcount) {
1662 pmc->mca_crcount--;
1663 if (pmc->mca_sfmode == MCAST_EXCLUDE) 1739 if (pmc->mca_sfmode == MCAST_EXCLUDE)
1664 type = MLD2_CHANGE_TO_EXCLUDE; 1740 type = MLD2_CHANGE_TO_EXCLUDE;
1665 else 1741 else
1666 type = MLD2_CHANGE_TO_INCLUDE; 1742 type = MLD2_CHANGE_TO_INCLUDE;
1667 skb = add_grec(skb, pmc, type, 0, 0); 1743 skb = add_grec(skb, pmc, type, 0, 0);
1744 pmc->mca_crcount--;
1668 } 1745 }
1669 spin_unlock_bh(&pmc->mca_lock); 1746 spin_unlock_bh(&pmc->mca_lock);
1670 } 1747 }
@@ -2023,6 +2100,9 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
2023{ 2100{
2024 int err; 2101 int err;
2025 2102
2103 /* callers have the socket lock and a write lock on ipv6_sk_mc_lock,
2104 * so no other readers or writers of iml or its sflist
2105 */
2026 if (iml->sflist == 0) { 2106 if (iml->sflist == 0) {
2027 /* any-source empty exclude case */ 2107 /* any-source empty exclude case */
2028 return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); 2108 return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 060d61202412..04912f9b35c3 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -211,7 +211,7 @@ config IP6_NF_TARGET_REJECT
211 211
212config IP6_NF_TARGET_NFQUEUE 212config IP6_NF_TARGET_NFQUEUE
213 tristate "NFQUEUE Target Support" 213 tristate "NFQUEUE Target Support"
214 depends on IP_NF_IPTABLES 214 depends on IP6_NF_IPTABLES
215 help 215 help
216 This Target replaced the old obsolete QUEUE target. 216 This Target replaced the old obsolete QUEUE target.
217 217
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index c0f1da5497a9..a7e03cfacd06 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -68,8 +68,8 @@ static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
68 [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1 68 [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1
69 }; 69 };
70 70
71 __u8 type = orig->dst.u.icmp.type - 128; 71 int type = orig->dst.u.icmp.type - 128;
72 if (type >= sizeof(invmap) || !invmap[type]) 72 if (type < 0 || type >= sizeof(invmap) || !invmap[type])
73 return 0; 73 return 0;
74 74
75 tuple->src.u.icmp.id = orig->src.u.icmp.id; 75 tuple->src.u.icmp.id = orig->src.u.icmp.id;
@@ -129,12 +129,12 @@ static int icmpv6_new(struct nf_conn *conntrack,
129 [ICMPV6_ECHO_REQUEST - 128] = 1, 129 [ICMPV6_ECHO_REQUEST - 128] = 1,
130 [ICMPV6_NI_QUERY - 128] = 1 130 [ICMPV6_NI_QUERY - 128] = 1
131 }; 131 };
132 int type = conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128;
132 133
133 if (conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128 >= sizeof(valid_new) 134 if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
134 || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128]) {
135 /* Can't create a new ICMPv6 `conn' with this. */ 135 /* Can't create a new ICMPv6 `conn' with this. */
136 DEBUGP("icmp: can't create new conn with type %u\n", 136 DEBUGP("icmpv6: can't create new conn with type %u\n",
137 conntrack->tuplehash[0].tuple.dst.u.icmp.type); 137 type + 128);
138 NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); 138 NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
139 return 0; 139 return 0;
140 } 140 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a7a537b50595..66140f13d119 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -413,11 +413,14 @@ static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
413 rt = ip6_rt_copy(ort); 413 rt = ip6_rt_copy(ort);
414 414
415 if (rt) { 415 if (rt) {
416 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 416 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
417 417 if (rt->rt6i_dst.plen != 128 &&
418 if (!(rt->rt6i_flags&RTF_GATEWAY)) 418 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
419 rt->rt6i_flags |= RTF_ANYCAST;
419 ipv6_addr_copy(&rt->rt6i_gateway, daddr); 420 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
421 }
420 422
423 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
421 rt->rt6i_dst.plen = 128; 424 rt->rt6i_dst.plen = 128;
422 rt->rt6i_flags |= RTF_CACHE; 425 rt->rt6i_flags |= RTF_CACHE;
423 rt->u.dst.flags |= DST_HOST; 426 rt->u.dst.flags |= DST_HOST;
@@ -829,7 +832,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
829 } 832 }
830 833
831 rt->u.dst.obsolete = -1; 834 rt->u.dst.obsolete = -1;
832 rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info); 835 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
833 if (nlh && (r = NLMSG_DATA(nlh))) { 836 if (nlh && (r = NLMSG_DATA(nlh))) {
834 rt->rt6i_protocol = r->rtm_protocol; 837 rt->rt6i_protocol = r->rtm_protocol;
835 } else { 838 } else {
@@ -1413,7 +1416,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1413 rt->u.dst.obsolete = -1; 1416 rt->u.dst.obsolete = -1;
1414 1417
1415 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 1418 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1416 if (!anycast) 1419 if (anycast)
1420 rt->rt6i_flags |= RTF_ANYCAST;
1421 else
1417 rt->rt6i_flags |= RTF_LOCAL; 1422 rt->rt6i_flags |= RTF_LOCAL;
1418 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 1423 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1419 if (rt->rt6i_nexthop == NULL) { 1424 if (rt->rt6i_nexthop == NULL) {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 62c0e5bd931c..8827389abaf7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -992,13 +992,12 @@ static void tcp_v6_send_reset(struct sk_buff *skb)
992 /* sk = NULL, but it is safe for now. RST socket required. */ 992 /* sk = NULL, but it is safe for now. RST socket required. */
993 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { 993 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
994 994
995 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) 995 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
996 ip6_xmit(NULL, buff, &fl, NULL, 0);
997 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
998 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
996 return; 999 return;
997 1000 }
998 ip6_xmit(NULL, buff, &fl, NULL, 0);
999 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1000 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1001 return;
1002 } 1001 }
1003 1002
1004 kfree_skb(buff); 1003 kfree_skb(buff);
@@ -1057,11 +1056,11 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32
1057 fl.fl_ip_sport = t1->source; 1056 fl.fl_ip_sport = t1->source;
1058 1057
1059 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { 1058 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1060 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) 1059 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
1060 ip6_xmit(NULL, buff, &fl, NULL, 0);
1061 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1061 return; 1062 return;
1062 ip6_xmit(NULL, buff, &fl, NULL, 0); 1063 }
1063 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1064 return;
1065 } 1064 }
1066 1065
1067 kfree_skb(buff); 1066 kfree_skb(buff);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index cf1d91e74c82..69bd957380e7 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -214,6 +214,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
214 case IPPROTO_UDP: 214 case IPPROTO_UDP:
215 case IPPROTO_TCP: 215 case IPPROTO_TCP:
216 case IPPROTO_SCTP: 216 case IPPROTO_SCTP:
217 case IPPROTO_DCCP:
217 if (pskb_may_pull(skb, skb->nh.raw + offset + 4 - skb->data)) { 218 if (pskb_may_pull(skb, skb->nh.raw + offset + 4 - skb->data)) {
218 u16 *ports = (u16 *)exthdr; 219 u16 *ports = (u16 *)exthdr;
219 220
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index a84f9221e5f0..794c41d19b28 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -61,8 +61,8 @@ config NF_CONNTRACK_MARK
61 instead of the individual packets. 61 instead of the individual packets.
62 62
63config NF_CONNTRACK_EVENTS 63config NF_CONNTRACK_EVENTS
64 bool "Connection tracking events" 64 bool "Connection tracking events (EXPERIMENTAL)"
65 depends on NF_CONNTRACK 65 depends on EXPERIMENTAL && NF_CONNTRACK
66 help 66 help
67 If this option is enabled, the connection tracking code will 67 If this option is enabled, the connection tracking code will
68 provide a notifier chain that can be used by other kernel code 68 provide a notifier chain that can be used by other kernel code
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 1da678303d78..a7c7b490cf22 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1383,6 +1383,9 @@ void nf_conntrack_cleanup(void)
1383 schedule(); 1383 schedule();
1384 goto i_see_dead_people; 1384 goto i_see_dead_people;
1385 } 1385 }
1386 /* wait until all references to nf_conntrack_untracked are dropped */
1387 while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
1388 schedule();
1386 1389
1387 for (i = 0; i < NF_CT_F_NUM; i++) { 1390 for (i = 0; i < NF_CT_F_NUM; i++) {
1388 if (nf_ct_cache[i].use == 0) 1391 if (nf_ct_cache[i].use == 0)
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index a60c59b97631..95fdf04f1d88 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -162,7 +162,7 @@ nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys,
162 return -EINVAL; 162 return -EINVAL;
163 } 163 }
164 164
165 min_len = NLMSG_ALIGN(sizeof(struct nfgenmsg)); 165 min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
166 if (unlikely(nlh->nlmsg_len < min_len)) 166 if (unlikely(nlh->nlmsg_len < min_len))
167 return -EINVAL; 167 return -EINVAL;
168 168
@@ -236,8 +236,7 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb,
236 } 236 }
237 237
238 /* All the messages must at least contain nfgenmsg */ 238 /* All the messages must at least contain nfgenmsg */
239 if (nlh->nlmsg_len < 239 if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg))) {
240 NLMSG_LENGTH(NLMSG_ALIGN(sizeof(struct nfgenmsg)))) {
241 DEBUGP("received message was too short\n"); 240 DEBUGP("received message was too short\n");
242 return 0; 241 return 0;
243 } 242 }
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 004e8599b8fe..a7d88b5ad756 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -99,7 +99,7 @@ static int nr_state1_machine(struct sock *sk, struct sk_buff *skb,
99 break; 99 break;
100 100
101 case NR_RESET: 101 case NR_RESET:
102 if (sysctl_netrom_reset_circuit); 102 if (sysctl_netrom_reset_circuit)
103 nr_disconnect(sk, ECONNRESET); 103 nr_disconnect(sk, ECONNRESET);
104 break; 104 break;
105 105
@@ -130,7 +130,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb,
130 break; 130 break;
131 131
132 case NR_RESET: 132 case NR_RESET:
133 if (sysctl_netrom_reset_circuit); 133 if (sysctl_netrom_reset_circuit)
134 nr_disconnect(sk, ECONNRESET); 134 nr_disconnect(sk, ECONNRESET);
135 break; 135 break;
136 136
@@ -265,7 +265,7 @@ static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype
265 break; 265 break;
266 266
267 case NR_RESET: 267 case NR_RESET:
268 if (sysctl_netrom_reset_circuit); 268 if (sysctl_netrom_reset_circuit)
269 nr_disconnect(sk, ECONNRESET); 269 nr_disconnect(sk, ECONNRESET);
270 break; 270 break;
271 271
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 499ae3df4a44..3e2462760413 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1587,23 +1587,47 @@ static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
1587 return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1); 1587 return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
1588} 1588}
1589 1589
1590static void free_pg_vec(char **pg_vec, unsigned order, unsigned len) 1590static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
1591{ 1591{
1592 int i; 1592 int i;
1593 1593
1594 for (i=0; i<len; i++) { 1594 for (i = 0; i < len; i++) {
1595 if (pg_vec[i]) { 1595 if (likely(pg_vec[i]))
1596 struct page *page, *pend; 1596 free_pages((unsigned long) pg_vec[i], order);
1597
1598 pend = pg_vec_endpage(pg_vec[i], order);
1599 for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1600 ClearPageReserved(page);
1601 free_pages((unsigned long)pg_vec[i], order);
1602 }
1603 } 1597 }
1604 kfree(pg_vec); 1598 kfree(pg_vec);
1605} 1599}
1606 1600
1601static inline char *alloc_one_pg_vec_page(unsigned long order)
1602{
1603 return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1604 order);
1605}
1606
1607static char **alloc_pg_vec(struct tpacket_req *req, int order)
1608{
1609 unsigned int block_nr = req->tp_block_nr;
1610 char **pg_vec;
1611 int i;
1612
1613 pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
1614 if (unlikely(!pg_vec))
1615 goto out;
1616
1617 for (i = 0; i < block_nr; i++) {
1618 pg_vec[i] = alloc_one_pg_vec_page(order);
1619 if (unlikely(!pg_vec[i]))
1620 goto out_free_pgvec;
1621 }
1622
1623out:
1624 return pg_vec;
1625
1626out_free_pgvec:
1627 free_pg_vec(pg_vec, order, block_nr);
1628 pg_vec = NULL;
1629 goto out;
1630}
1607 1631
1608static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing) 1632static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1609{ 1633{
@@ -1617,64 +1641,46 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
1617 1641
1618 /* Sanity tests and some calculations */ 1642 /* Sanity tests and some calculations */
1619 1643
1620 if (po->pg_vec) 1644 if (unlikely(po->pg_vec))
1621 return -EBUSY; 1645 return -EBUSY;
1622 1646
1623 if ((int)req->tp_block_size <= 0) 1647 if (unlikely((int)req->tp_block_size <= 0))
1624 return -EINVAL; 1648 return -EINVAL;
1625 if (req->tp_block_size&(PAGE_SIZE-1)) 1649 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
1626 return -EINVAL; 1650 return -EINVAL;
1627 if (req->tp_frame_size < TPACKET_HDRLEN) 1651 if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
1628 return -EINVAL; 1652 return -EINVAL;
1629 if (req->tp_frame_size&(TPACKET_ALIGNMENT-1)) 1653 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
1630 return -EINVAL; 1654 return -EINVAL;
1631 1655
1632 po->frames_per_block = req->tp_block_size/req->tp_frame_size; 1656 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
1633 if (po->frames_per_block <= 0) 1657 if (unlikely(po->frames_per_block <= 0))
1634 return -EINVAL; 1658 return -EINVAL;
1635 if (po->frames_per_block*req->tp_block_nr != req->tp_frame_nr) 1659 if (unlikely((po->frames_per_block * req->tp_block_nr) !=
1660 req->tp_frame_nr))
1636 return -EINVAL; 1661 return -EINVAL;
1637 /* OK! */
1638
1639 /* Allocate page vector */
1640 while ((PAGE_SIZE<<order) < req->tp_block_size)
1641 order++;
1642 1662
1643 err = -ENOMEM; 1663 err = -ENOMEM;
1644 1664 order = get_order(req->tp_block_size);
1645 pg_vec = kmalloc(req->tp_block_nr*sizeof(char *), GFP_KERNEL); 1665 pg_vec = alloc_pg_vec(req, order);
1646 if (pg_vec == NULL) 1666 if (unlikely(!pg_vec))
1647 goto out; 1667 goto out;
1648 memset(pg_vec, 0, req->tp_block_nr*sizeof(char **));
1649
1650 for (i=0; i<req->tp_block_nr; i++) {
1651 struct page *page, *pend;
1652 pg_vec[i] = (char *)__get_free_pages(GFP_KERNEL, order);
1653 if (!pg_vec[i])
1654 goto out_free_pgvec;
1655
1656 pend = pg_vec_endpage(pg_vec[i], order);
1657 for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1658 SetPageReserved(page);
1659 }
1660 /* Page vector is allocated */
1661 1668
1662 l = 0; 1669 l = 0;
1663 for (i=0; i<req->tp_block_nr; i++) { 1670 for (i = 0; i < req->tp_block_nr; i++) {
1664 char *ptr = pg_vec[i]; 1671 char *ptr = pg_vec[i];
1665 struct tpacket_hdr *header; 1672 struct tpacket_hdr *header;
1666 int k; 1673 int k;
1667 1674
1668 for (k=0; k<po->frames_per_block; k++) { 1675 for (k = 0; k < po->frames_per_block; k++) {
1669 1676 header = (struct tpacket_hdr *) ptr;
1670 header = (struct tpacket_hdr*)ptr;
1671 header->tp_status = TP_STATUS_KERNEL; 1677 header->tp_status = TP_STATUS_KERNEL;
1672 ptr += req->tp_frame_size; 1678 ptr += req->tp_frame_size;
1673 } 1679 }
1674 } 1680 }
1675 /* Done */ 1681 /* Done */
1676 } else { 1682 } else {
1677 if (req->tp_frame_nr) 1683 if (unlikely(req->tp_frame_nr))
1678 return -EINVAL; 1684 return -EINVAL;
1679 } 1685 }
1680 1686
@@ -1701,7 +1707,7 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
1701 1707
1702 spin_lock_bh(&sk->sk_receive_queue.lock); 1708 spin_lock_bh(&sk->sk_receive_queue.lock);
1703 pg_vec = XC(po->pg_vec, pg_vec); 1709 pg_vec = XC(po->pg_vec, pg_vec);
1704 po->frame_max = req->tp_frame_nr-1; 1710 po->frame_max = (req->tp_frame_nr - 1);
1705 po->head = 0; 1711 po->head = 0;
1706 po->frame_size = req->tp_frame_size; 1712 po->frame_size = req->tp_frame_size;
1707 spin_unlock_bh(&sk->sk_receive_queue.lock); 1713 spin_unlock_bh(&sk->sk_receive_queue.lock);
@@ -1728,7 +1734,6 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
1728 1734
1729 release_sock(sk); 1735 release_sock(sk);
1730 1736
1731out_free_pgvec:
1732 if (pg_vec) 1737 if (pg_vec)
1733 free_pg_vec(pg_vec, order, req->tp_block_nr); 1738 free_pg_vec(pg_vec, order, req->tp_block_nr);
1734out: 1739out:
@@ -1755,17 +1760,19 @@ static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_st
1755 if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE) 1760 if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1756 goto out; 1761 goto out;
1757 1762
1758 atomic_inc(&po->mapped);
1759 start = vma->vm_start; 1763 start = vma->vm_start;
1760 err = -EAGAIN; 1764 for (i = 0; i < po->pg_vec_len; i++) {
1761 for (i=0; i<po->pg_vec_len; i++) { 1765 struct page *page = virt_to_page(po->pg_vec[i]);
1762 if (remap_pfn_range(vma, start, 1766 int pg_num;
1763 __pa(po->pg_vec[i]) >> PAGE_SHIFT, 1767
1764 po->pg_vec_pages*PAGE_SIZE, 1768 for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
1765 vma->vm_page_prot)) 1769 err = vm_insert_page(vma, start, page);
1766 goto out; 1770 if (unlikely(err))
1767 start += po->pg_vec_pages*PAGE_SIZE; 1771 goto out;
1772 start += PAGE_SIZE;
1773 }
1768 } 1774 }
1775 atomic_inc(&po->mapped);
1769 vma->vm_ops = &packet_mmap_ops; 1776 vma->vm_ops = &packet_mmap_ops;
1770 err = 0; 1777 err = 0;
1771 1778
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 8aebe8f6d271..2ce1cb2aa2ed 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -34,7 +34,7 @@
34#include <net/sch_generic.h> 34#include <net/sch_generic.h>
35#include <net/act_api.h> 35#include <net/act_api.h>
36 36
37#if 1 /* control */ 37#if 0 /* control */
38#define DPRINTK(format, args...) printk(KERN_DEBUG format, ##args) 38#define DPRINTK(format, args...) printk(KERN_DEBUG format, ##args)
39#else 39#else
40#define DPRINTK(format, args...) 40#define DPRINTK(format, args...)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d890dfa8818f..9df888e932c5 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -156,10 +156,6 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
156 sizeof(struct sk_buff) + 156 sizeof(struct sk_buff) +
157 sizeof(struct sctp_chunk); 157 sizeof(struct sctp_chunk);
158 158
159 sk->sk_wmem_queued += SCTP_DATA_SNDSIZE(chunk) +
160 sizeof(struct sk_buff) +
161 sizeof(struct sctp_chunk);
162
163 atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); 159 atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
164} 160}
165 161
@@ -3425,7 +3421,7 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add
3425} 3421}
3426 3422
3427static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port, 3423static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port,
3428 void * __user *to, size_t space_left) 3424 void __user **to, size_t space_left)
3429{ 3425{
3430 struct list_head *pos; 3426 struct list_head *pos;
3431 struct sctp_sockaddr_entry *addr; 3427 struct sctp_sockaddr_entry *addr;
@@ -4426,7 +4422,7 @@ cleanup:
4426 * tcp_poll(). Note that, based on these implementations, we don't 4422 * tcp_poll(). Note that, based on these implementations, we don't
4427 * lock the socket in this function, even though it seems that, 4423 * lock the socket in this function, even though it seems that,
4428 * ideally, locking or some other mechanisms can be used to ensure 4424 * ideally, locking or some other mechanisms can be used to ensure
4429 * the integrity of the counters (sndbuf and wmem_queued) used 4425 * the integrity of the counters (sndbuf and wmem_alloc) used
4430 * in this place. We assume that we don't need locks either until proven 4426 * in this place. We assume that we don't need locks either until proven
4431 * otherwise. 4427 * otherwise.
4432 * 4428 *
@@ -4833,10 +4829,6 @@ static void sctp_wfree(struct sk_buff *skb)
4833 sizeof(struct sk_buff) + 4829 sizeof(struct sk_buff) +
4834 sizeof(struct sctp_chunk); 4830 sizeof(struct sctp_chunk);
4835 4831
4836 sk->sk_wmem_queued -= SCTP_DATA_SNDSIZE(chunk) +
4837 sizeof(struct sk_buff) +
4838 sizeof(struct sctp_chunk);
4839
4840 atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); 4832 atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
4841 4833
4842 sock_wfree(skb); 4834 sock_wfree(skb);
@@ -4920,7 +4912,7 @@ void sctp_write_space(struct sock *sk)
4920 4912
4921/* Is there any sndbuf space available on the socket? 4913/* Is there any sndbuf space available on the socket?
4922 * 4914 *
4923 * Note that wmem_queued is the sum of the send buffers on all of the 4915 * Note that sk_wmem_alloc is the sum of the send buffers on all of the
4924 * associations on the same socket. For a UDP-style socket with 4916 * associations on the same socket. For a UDP-style socket with
4925 * multiple associations, it is possible for it to be "unwriteable" 4917 * multiple associations, it is possible for it to be "unwriteable"
4926 * prematurely. I assume that this is acceptable because 4918 * prematurely. I assume that this is acceptable because
@@ -4933,7 +4925,7 @@ static int sctp_writeable(struct sock *sk)
4933{ 4925{
4934 int amt = 0; 4926 int amt = 0;
4935 4927
4936 amt = sk->sk_sndbuf - sk->sk_wmem_queued; 4928 amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
4937 if (amt < 0) 4929 if (amt < 0)
4938 amt = 0; 4930 amt = 0;
4939 return amt; 4931 return amt;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index f44f46f1d8e0..8d782282ec19 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -638,7 +638,7 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
638 gss_msg); 638 gss_msg);
639 atomic_inc(&gss_msg->count); 639 atomic_inc(&gss_msg->count);
640 gss_unhash_msg(gss_msg); 640 gss_unhash_msg(gss_msg);
641 if (msg->errno == -ETIMEDOUT || msg->errno == -EPIPE) { 641 if (msg->errno == -ETIMEDOUT) {
642 unsigned long now = jiffies; 642 unsigned long now = jiffies;
643 if (time_after(now, ratelimit)) { 643 if (time_after(now, ratelimit)) {
644 printk(KERN_WARNING "RPC: AUTH_GSS upcall timed out.\n" 644 printk(KERN_WARNING "RPC: AUTH_GSS upcall timed out.\n"
@@ -786,7 +786,9 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int taskflags)
786 cred->gc_flags = 0; 786 cred->gc_flags = 0;
787 cred->gc_base.cr_ops = &gss_credops; 787 cred->gc_base.cr_ops = &gss_credops;
788 cred->gc_service = gss_auth->service; 788 cred->gc_service = gss_auth->service;
789 err = gss_create_upcall(gss_auth, cred); 789 do {
790 err = gss_create_upcall(gss_auth, cred);
791 } while (err == -EAGAIN);
790 if (err < 0) 792 if (err < 0)
791 goto out_err; 793 goto out_err;
792 794
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index c76ea221798c..16a2458f38f7 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -174,7 +174,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
174 goto out; 174 goto out;
175 msg = (struct rpc_pipe_msg *)filp->private_data; 175 msg = (struct rpc_pipe_msg *)filp->private_data;
176 if (msg != NULL) { 176 if (msg != NULL) {
177 msg->errno = -EPIPE; 177 msg->errno = -EAGAIN;
178 list_del_init(&msg->list); 178 list_del_init(&msg->list);
179 rpci->ops->destroy_msg(msg); 179 rpci->ops->destroy_msg(msg);
180 } 180 }
@@ -183,7 +183,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
183 if (filp->f_mode & FMODE_READ) 183 if (filp->f_mode & FMODE_READ)
184 rpci->nreaders --; 184 rpci->nreaders --;
185 if (!rpci->nreaders) 185 if (!rpci->nreaders)
186 __rpc_purge_upcall(inode, -EPIPE); 186 __rpc_purge_upcall(inode, -EAGAIN);
187 if (rpci->ops->release_pipe) 187 if (rpci->ops->release_pipe)
188 rpci->ops->release_pipe(inode); 188 rpci->ops->release_pipe(inode);
189out: 189out:
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 0a51fd46a848..77e8800d4127 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -990,6 +990,7 @@ static void xs_udp_connect_worker(void *args)
990 sk->sk_data_ready = xs_udp_data_ready; 990 sk->sk_data_ready = xs_udp_data_ready;
991 sk->sk_write_space = xs_udp_write_space; 991 sk->sk_write_space = xs_udp_write_space;
992 sk->sk_no_check = UDP_CSUM_NORCV; 992 sk->sk_no_check = UDP_CSUM_NORCV;
993 sk->sk_allocation = GFP_ATOMIC;
993 994
994 xprt_set_connected(xprt); 995 xprt_set_connected(xprt);
995 996
@@ -1074,6 +1075,7 @@ static void xs_tcp_connect_worker(void *args)
1074 sk->sk_data_ready = xs_tcp_data_ready; 1075 sk->sk_data_ready = xs_tcp_data_ready;
1075 sk->sk_state_change = xs_tcp_state_change; 1076 sk->sk_state_change = xs_tcp_state_change;
1076 sk->sk_write_space = xs_tcp_write_space; 1077 sk->sk_write_space = xs_tcp_write_space;
1078 sk->sk_allocation = GFP_ATOMIC;
1077 1079
1078 /* socket options */ 1080 /* socket options */
1079 sk->sk_userlocks |= SOCK_BINDPORT_LOCK; 1081 sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0db9e57013fd..d19e274b9c4a 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -346,6 +346,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
346 struct xfrm_policy *pol, **p; 346 struct xfrm_policy *pol, **p;
347 struct xfrm_policy *delpol = NULL; 347 struct xfrm_policy *delpol = NULL;
348 struct xfrm_policy **newpos = NULL; 348 struct xfrm_policy **newpos = NULL;
349 struct dst_entry *gc_list;
349 350
350 write_lock_bh(&xfrm_policy_lock); 351 write_lock_bh(&xfrm_policy_lock);
351 for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) { 352 for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
@@ -381,9 +382,36 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
381 xfrm_pol_hold(policy); 382 xfrm_pol_hold(policy);
382 write_unlock_bh(&xfrm_policy_lock); 383 write_unlock_bh(&xfrm_policy_lock);
383 384
384 if (delpol) { 385 if (delpol)
385 xfrm_policy_kill(delpol); 386 xfrm_policy_kill(delpol);
387
388 read_lock_bh(&xfrm_policy_lock);
389 gc_list = NULL;
390 for (policy = policy->next; policy; policy = policy->next) {
391 struct dst_entry *dst;
392
393 write_lock(&policy->lock);
394 dst = policy->bundles;
395 if (dst) {
396 struct dst_entry *tail = dst;
397 while (tail->next)
398 tail = tail->next;
399 tail->next = gc_list;
400 gc_list = dst;
401
402 policy->bundles = NULL;
403 }
404 write_unlock(&policy->lock);
405 }
406 read_unlock_bh(&xfrm_policy_lock);
407
408 while (gc_list) {
409 struct dst_entry *dst = gc_list;
410
411 gc_list = dst->next;
412 dst_free(dst);
386 } 413 }
414
387 return 0; 415 return 0;
388} 416}
389EXPORT_SYMBOL(xfrm_policy_insert); 417EXPORT_SYMBOL(xfrm_policy_insert);
@@ -1014,13 +1042,12 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
1014} 1042}
1015EXPORT_SYMBOL(__xfrm_route_forward); 1043EXPORT_SYMBOL(__xfrm_route_forward);
1016 1044
1017/* Optimize later using cookies and generation ids. */
1018
1019static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) 1045static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
1020{ 1046{
1021 if (!stale_bundle(dst)) 1047 /* If it is marked obsolete, which is how we even get here,
1022 return dst; 1048 * then we have purged it from the policy bundle list and we
1023 1049 * did that for a good reason.
1050 */
1024 return NULL; 1051 return NULL;
1025} 1052}
1026 1053
@@ -1104,6 +1131,16 @@ int xfrm_flush_bundles(void)
1104 return 0; 1131 return 0;
1105} 1132}
1106 1133
1134static int always_true(struct dst_entry *dst)
1135{
1136 return 1;
1137}
1138
1139void xfrm_flush_all_bundles(void)
1140{
1141 xfrm_prune_bundles(always_true);
1142}
1143
1107void xfrm_init_pmtu(struct dst_entry *dst) 1144void xfrm_init_pmtu(struct dst_entry *dst)
1108{ 1145{
1109 do { 1146 do {
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 7cf48aa6c95b..479effc97666 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -431,6 +431,8 @@ void xfrm_state_insert(struct xfrm_state *x)
431 spin_lock_bh(&xfrm_state_lock); 431 spin_lock_bh(&xfrm_state_lock);
432 __xfrm_state_insert(x); 432 __xfrm_state_insert(x);
433 spin_unlock_bh(&xfrm_state_lock); 433 spin_unlock_bh(&xfrm_state_lock);
434
435 xfrm_flush_all_bundles();
434} 436}
435EXPORT_SYMBOL(xfrm_state_insert); 437EXPORT_SYMBOL(xfrm_state_insert);
436 438
@@ -478,6 +480,9 @@ out:
478 spin_unlock_bh(&xfrm_state_lock); 480 spin_unlock_bh(&xfrm_state_lock);
479 xfrm_state_put_afinfo(afinfo); 481 xfrm_state_put_afinfo(afinfo);
480 482
483 if (!err)
484 xfrm_flush_all_bundles();
485
481 if (x1) { 486 if (x1) {
482 xfrm_state_delete(x1); 487 xfrm_state_delete(x1);
483 xfrm_state_put(x1); 488 xfrm_state_put(x1);