7 files changed, 180 insertions, 55 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 74f2207e131a..4ec4b2ca6ab1 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -715,6 +715,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
                        break;
                ret = 0;
                if (ifa->ifa_mask != sin->sin_addr.s_addr) {
+                        u32 old_mask = ifa->ifa_mask;
                        inet_del_ifa(in_dev, ifap, 0);
                        ifa->ifa_mask = sin->sin_addr.s_addr;
                        ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
@@ -728,7 +729,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
                        if ((dev->flags & IFF_BROADCAST) &&
                            (ifa->ifa_prefixlen < 31) &&
                            (ifa->ifa_broadcast ==
-                             (ifa->ifa_local|~ifa->ifa_mask))) {
+                             (ifa->ifa_local|~old_mask))) {
                                ifa->ifa_broadcast = (ifa->ifa_local |
                                                      ~sin->sin_addr.s_addr);
                        }
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 0093ea08c7f5..66247f38b371 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2404,7 +2404,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
                prefix = htonl(l->key);
                list_for_each_entry_rcu(fa, &li->falh, fa_list) {
-                        const struct fib_info *fi = rcu_dereference(fa->fa_info);
+                        const struct fib_info *fi = fa->fa_info;
                        unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
                        if (fa->fa_type == RTN_BROADCAST
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 90dca711ac9f..175e093ec564 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1108,12 +1108,9 @@ void __init icmp_init(struct net_proto_family *ops)
        struct inet_sock *inet;
        int i;
-        for (i = 0; i < NR_CPUS; i++) {
+        for_each_cpu(i) {
                int err;
-                if (!cpu_possible(i))
-                        continue;
                err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP,
                                       &per_cpu(__icmp_socket, i));
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1ad5202e556b..17758234a3e3 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -275,7 +275,8 @@ int ip_output(struct sk_buff *skb)
 {
        IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
-        if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->tso_size)
+        if (skb->len > dst_mtu(skb->dst) &&
+                !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
                return ip_fragment(skb, ip_finish_output);
        else
                return ip_finish_output(skb);
@@ -688,6 +689,60 @@ csum_page(struct page *page, int offset, int copy)
        return csum;
 }
+inline int ip_ufo_append_data(struct sock *sk,
+                        int getfrag(void *from, char *to, int offset, int len,
+                               int odd, struct sk_buff *skb),
+                        void *from, int length, int hh_len, int fragheaderlen,
+                        int transhdrlen, int mtu,unsigned int flags)
+{
+        struct sk_buff *skb;
+        int err;
+        /* There is support for UDP fragmentation offload by network
+         * device, so create one single skb packet containing complete
+         * udp datagram
+         */
+        if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
+                skb = sock_alloc_send_skb(sk,
+                        hh_len + fragheaderlen + transhdrlen + 20,
+                        (flags & MSG_DONTWAIT), &err);
+                if (skb == NULL)
+                        return err;
+                /* reserve space for Hardware header */
+                skb_reserve(skb, hh_len);
+                /* create space for UDP/IP header */
+                skb_put(skb,fragheaderlen + transhdrlen);
+                /* initialize network header pointer */
+                skb->nh.raw = skb->data;
+                /* initialize protocol header pointer */
+                skb->h.raw = skb->data + fragheaderlen;
+                skb->ip_summed = CHECKSUM_HW;
+                skb->csum = 0;
+                sk->sk_sndmsg_off = 0;
+        }
+        err = skb_append_datato_frags(sk,skb, getfrag, from,
+                               (length - transhdrlen));
+        if (!err) {
+                /* specify the length of each IP datagram fragment*/
+                skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
+                __skb_queue_tail(&sk->sk_write_queue, skb);
+                return 0;
+        }
+        /* There is not enough support do UFO ,
+         * so follow normal path
+         */
+        kfree_skb(skb);
+        return err;
+}
 /*
 *      ip_append_data() and ip_append_page() can make one large IP datagram
 *      from many pieces of data. Each pieces will be holded on the socket
@@ -777,6 +832,15 @@ int ip_append_data(struct sock *sk,
                csummode = CHECKSUM_HW;
        inet->cork.length += length;
+        if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
+                        (rt->u.dst.dev->features & NETIF_F_UFO)) {
+                if(ip_ufo_append_data(sk, getfrag, from, length, hh_len,
+                               fragheaderlen, transhdrlen, mtu, flags))
+                        goto error;
+                return 0;
+        }
        /* So, what's going on in the loop below?
         *
@@ -1008,14 +1072,23 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
                return -EINVAL;
        inet->cork.length += size;
+        if ((sk->sk_protocol == IPPROTO_UDP) &&
+            (rt->u.dst.dev->features & NETIF_F_UFO))
+                skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
        while (size > 0) {
                int i;
-                /* Check if the remaining data fits into current packet. */
+                if (skb_shinfo(skb)->ufo_size)
-                len = mtu - skb->len;
+                        len = size;
-                if (len < size)
+                else {
-                        len = maxfraglen - skb->len;
+                        /* Check if the remaining data fits into current packet. */
+                        len = mtu - skb->len;
+                        if (len < size)
+                                len = maxfraglen - skb->len;
+                }
                if (len <= 0) {
                        struct sk_buff *skb_prev;
                        char *data;
@@ -1023,10 +1096,7 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
                        int alloclen;
                        skb_prev = skb;
-                        if (skb_prev)
+                        fraggap = skb_prev->len - maxfraglen;
-                                fraggap = skb_prev->len - maxfraglen;
-                        else
-                                fraggap = 0;
                        alloclen = fragheaderlen + hh_len + fraggap + 15;
                        skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 07a80b56e8dc..422ab68ee7fb 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -50,7 +50,7 @@
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/listhelp.h>
-#define IP_CONNTRACK_VERSION    "2.3"
+#define IP_CONNTRACK_VERSION    "2.4"
 #if 0
 #define DEBUGP printk
@@ -148,16 +148,20 @@ DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
 static int ip_conntrack_hash_rnd_initted;
 static unsigned int ip_conntrack_hash_rnd;
-static u_int32_t
+static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
-hash_conntrack(const struct ip_conntrack_tuple *tuple)
+                            unsigned int size, unsigned int rnd)
 {
-#if 0
-        dump_tuple(tuple);
-#endif
        return (jhash_3words(tuple->src.ip,
                             (tuple->dst.ip ^ tuple->dst.protonum),
                             (tuple->src.u.all | (tuple->dst.u.all << 16)),
-                             ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
+                             rnd) % size);
+}
+static u_int32_t
+hash_conntrack(const struct ip_conntrack_tuple *tuple)
+{
+        return __hash_conntrack(tuple, ip_conntrack_htable_size,
+                                ip_conntrack_hash_rnd);
 }
 int
@@ -1341,14 +1345,13 @@ static int kill_all(struct ip_conntrack *i, void *data)
        return 1;
 }
-static void free_conntrack_hash(void)
+static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
 {
-        if (ip_conntrack_vmalloc)
+        if (vmalloced)
-                vfree(ip_conntrack_hash);
+                vfree(hash);
        else
-                free_pages((unsigned long)ip_conntrack_hash, 
+                free_pages((unsigned long)hash, 
-                           get_order(sizeof(struct list_head)
+                           get_order(sizeof(struct list_head) * size));
-                                     * ip_conntrack_htable_size));
 }
 void ip_conntrack_flush()
@@ -1378,12 +1381,83 @@ void ip_conntrack_cleanup(void)
        ip_conntrack_flush();
        kmem_cache_destroy(ip_conntrack_cachep);
        kmem_cache_destroy(ip_conntrack_expect_cachep);
-        free_conntrack_hash();
+        free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
+                            ip_conntrack_htable_size);
        nf_unregister_sockopt(&so_getorigdst);
 }
-static int hashsize;
+static struct list_head *alloc_hashtable(int size, int *vmalloced)
-module_param(hashsize, int, 0400);
+{
+        struct list_head *hash;
+        unsigned int i;
+        *vmalloced = 0; 
+        hash = (void*)__get_free_pages(GFP_KERNEL, 
+                                       get_order(sizeof(struct list_head)
+                                                 * size));
+        if (!hash) { 
+                *vmalloced = 1;
+                printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
+                hash = vmalloc(sizeof(struct list_head) * size);
+        }
+        if (hash)
+                for (i = 0; i < size; i++)
+                        INIT_LIST_HEAD(&hash[i]);
+        return hash;
+}
+int set_hashsize(const char *val, struct kernel_param *kp)
+{
+        int i, bucket, hashsize, vmalloced;
+        int old_vmalloced, old_size;
+        int rnd;
+        struct list_head *hash, *old_hash;
+        struct ip_conntrack_tuple_hash *h;
+        /* On boot, we can set this without any fancy locking. */
+        if (!ip_conntrack_htable_size)
+                return param_set_int(val, kp);
+        hashsize = simple_strtol(val, NULL, 0);
+        if (!hashsize)
+                return -EINVAL;
+        hash = alloc_hashtable(hashsize, &vmalloced);
+        if (!hash)
+                return -ENOMEM;
+        /* We have to rehash for the new table anyway, so we also can 
+         * use a new random seed */
+        get_random_bytes(&rnd, 4);
+        write_lock_bh(&ip_conntrack_lock);
+        for (i = 0; i < ip_conntrack_htable_size; i++) {
+                while (!list_empty(&ip_conntrack_hash[i])) {
+                        h = list_entry(ip_conntrack_hash[i].next,
+                                       struct ip_conntrack_tuple_hash, list);
+                        list_del(&h->list);
+                        bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
+                        list_add_tail(&h->list, &hash[bucket]);
+                }
+        }
+        old_size = ip_conntrack_htable_size;
+        old_vmalloced = ip_conntrack_vmalloc;
+        old_hash = ip_conntrack_hash;
+        ip_conntrack_htable_size = hashsize;
+        ip_conntrack_vmalloc = vmalloced;
+        ip_conntrack_hash = hash;
+        ip_conntrack_hash_rnd = rnd;
+        write_unlock_bh(&ip_conntrack_lock);
+        free_conntrack_hash(old_hash, old_vmalloced, old_size);
+        return 0;
+}
+module_param_call(hashsize, set_hashsize, param_get_uint,
+                  &ip_conntrack_htable_size, 0600);
 int __init ip_conntrack_init(void)
 {
@@ -1392,9 +1466,7 @@ int __init ip_conntrack_init(void)
        /* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
         * machine has 256 buckets.  >= 1GB machines have 8192 buckets. */
-        if (hashsize) {
+        if (!ip_conntrack_htable_size) {
-                ip_conntrack_htable_size = hashsize;
-        } else {
                ip_conntrack_htable_size
                        = (((num_physpages << PAGE_SHIFT) / 16384)
                           / sizeof(struct list_head));
@@ -1416,20 +1488,8 @@ int __init ip_conntrack_init(void)
                return ret;
        }
-        /* AK: the hash table is twice as big than needed because it
+        ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
-           uses list_head.  it would be much nicer to caches to use a
+                                            &ip_conntrack_vmalloc);
-           single pointer list head here. */
-        ip_conntrack_vmalloc = 0; 
-        ip_conntrack_hash 
-                =(void*)__get_free_pages(GFP_KERNEL, 
-                                         get_order(sizeof(struct list_head)
-                                                   *ip_conntrack_htable_size));
-        if (!ip_conntrack_hash) { 
-                ip_conntrack_vmalloc = 1;
-                printk(KERN_WARNING "ip_conntrack: falling back to vmalloc.\n");
-                ip_conntrack_hash = vmalloc(sizeof(struct list_head)
-                                            * ip_conntrack_htable_size);
-        }
        if (!ip_conntrack_hash) {
                printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
                goto err_unreg_sockopt;
@@ -1461,9 +1521,6 @@ int __init ip_conntrack_init(void)
        ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
        write_unlock_bh(&ip_conntrack_lock);
-        for (i = 0; i < ip_conntrack_htable_size; i++)
-                INIT_LIST_HEAD(&ip_conntrack_hash[i]);
        /* For use by ipt_REJECT */
        ip_ct_attach = ip_conntrack_attach;
@@ -1478,7 +1535,8 @@ int __init ip_conntrack_init(void)
 err_free_conntrack_slab:
        kmem_cache_destroy(ip_conntrack_cachep);
 err_free_hash:
-        free_conntrack_hash();
+        free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
+                            ip_conntrack_htable_size);
 err_unreg_sockopt:
        nf_unregister_sockopt(&so_getorigdst);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index f7943ba1f43c..a65e508fbd40 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -90,9 +90,7 @@ fold_field(void *mib[], int offt)
        unsigned long res = 0;
        int i;
-        for (i = 0; i < NR_CPUS; i++) {
+        for_each_cpu(i) {
-                if (!cpu_possible(i))
-                        continue;
                res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
                res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
        }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 677419d0c9ad..3e98b57578dc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2239,6 +2239,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
                        /* Note, it is the only place, where
                         * fast path is recovered for sending TCP.
                         */
+                        tp->pred_flags = 0;
                        tcp_fast_path_check(sk, tp);
                        if (nwin > tp->max_window) {

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 74f2207e131a..4ec4b2ca6ab1 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c
@@ -715,6 +715,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
715	break;	715	break;
716	ret = 0;	716	ret = 0;
717	if (ifa->ifa_mask != sin->sin_addr.s_addr) {	717	if (ifa->ifa_mask != sin->sin_addr.s_addr) {
		718	u32 old_mask = ifa->ifa_mask;
718	inet_del_ifa(in_dev, ifap, 0);	719	inet_del_ifa(in_dev, ifap, 0);
719	ifa->ifa_mask = sin->sin_addr.s_addr;	720	ifa->ifa_mask = sin->sin_addr.s_addr;
720	ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);	721	ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
@@ -728,7 +729,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
728	if ((dev->flags & IFF_BROADCAST) &&	729	if ((dev->flags & IFF_BROADCAST) &&
729	(ifa->ifa_prefixlen < 31) &&	730	(ifa->ifa_prefixlen < 31) &&
730	(ifa->ifa_broadcast ==	731	(ifa->ifa_broadcast ==
731	(ifa->ifa_local\|~ifa->ifa_mask))) {	732	(ifa->ifa_local\|~old_mask))) {
732	ifa->ifa_broadcast = (ifa->ifa_local \|	733	ifa->ifa_broadcast = (ifa->ifa_local \|
733	~sin->sin_addr.s_addr);	734	~sin->sin_addr.s_addr);
734	}	735	}


diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 0093ea08c7f5..66247f38b371 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c
@@ -2404,7 +2404,7 @@ static int fib_route_seq_show(struct seq_file seq, void v)
2404	prefix = htonl(l->key);	2404	prefix = htonl(l->key);
2405		2405
2406	list_for_each_entry_rcu(fa, &li->falh, fa_list) {	2406	list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2407	const struct fib_info *fi = rcu_dereference(fa->fa_info);	2407	const struct fib_info *fi = fa->fa_info;
2408	unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);	2408	unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
2409		2409
2410	if (fa->fa_type == RTN_BROADCAST	2410	if (fa->fa_type == RTN_BROADCAST


diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 90dca711ac9f..175e093ec564 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c
@@ -1108,12 +1108,9 @@ void __init icmp_init(struct net_proto_family *ops)
1108	struct inet_sock *inet;	1108	struct inet_sock *inet;
1109	int i;	1109	int i;
1110		1110
1111	for (i = 0; i < NR_CPUS; i++) {	1111	for_each_cpu(i) {
1112	int err;	1112	int err;
1113		1113
1114	if (!cpu_possible(i))
1115	continue;
1116
1117	err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP,	1114	err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP,
1118	&per_cpu(__icmp_socket, i));	1115	&per_cpu(__icmp_socket, i));
1119		1116


diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 1ad5202e556b..17758234a3e3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c
@@ -275,7 +275,8 @@ int ip_output(struct sk_buff *skb)
275	{	275	{
276	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);	276	IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
277		277
278	if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->tso_size)	278	if (skb->len > dst_mtu(skb->dst) &&
		279	!(skb_shinfo(skb)->ufo_size \|\| skb_shinfo(skb)->tso_size))
279	return ip_fragment(skb, ip_finish_output);	280	return ip_fragment(skb, ip_finish_output);
280	else	281	else
281	return ip_finish_output(skb);	282	return ip_finish_output(skb);
@@ -688,6 +689,60 @@ csum_page(struct page *page, int offset, int copy)
688	return csum;	689	return csum;
689	}	690	}
690		691
		692	inline int ip_ufo_append_data(struct sock *sk,
		693	int getfrag(void from, char to, int offset, int len,
		694	int odd, struct sk_buff *skb),
		695	void *from, int length, int hh_len, int fragheaderlen,
		696	int transhdrlen, int mtu,unsigned int flags)
		697	{
		698	struct sk_buff *skb;
		699	int err;
		700
		701	/* There is support for UDP fragmentation offload by network
		702	* device, so create one single skb packet containing complete
		703	* udp datagram
		704	*/
		705	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
		706	skb = sock_alloc_send_skb(sk,
		707	hh_len + fragheaderlen + transhdrlen + 20,
		708	(flags & MSG_DONTWAIT), &err);
		709
		710	if (skb == NULL)
		711	return err;
		712
		713	/* reserve space for Hardware header */
		714	skb_reserve(skb, hh_len);
		715
		716	/* create space for UDP/IP header */
		717	skb_put(skb,fragheaderlen + transhdrlen);
		718
		719	/* initialize network header pointer */
		720	skb->nh.raw = skb->data;
		721
		722	/* initialize protocol header pointer */
		723	skb->h.raw = skb->data + fragheaderlen;
		724
		725	skb->ip_summed = CHECKSUM_HW;
		726	skb->csum = 0;
		727	sk->sk_sndmsg_off = 0;
		728	}
		729
		730	err = skb_append_datato_frags(sk,skb, getfrag, from,
		731	(length - transhdrlen));
		732	if (!err) {
		733	/* specify the length of each IP datagram fragment*/
		734	skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
		735	__skb_queue_tail(&sk->sk_write_queue, skb);
		736
		737	return 0;
		738	}
		739	/* There is not enough support do UFO ,
		740	* so follow normal path
		741	*/
		742	kfree_skb(skb);
		743	return err;
		744	}
		745
691	/*	746	/*
692	* ip_append_data() and ip_append_page() can make one large IP datagram	747	* ip_append_data() and ip_append_page() can make one large IP datagram
693	* from many pieces of data. Each pieces will be holded on the socket	748	* from many pieces of data. Each pieces will be holded on the socket
@@ -777,6 +832,15 @@ int ip_append_data(struct sock *sk,
777	csummode = CHECKSUM_HW;	832	csummode = CHECKSUM_HW;
778		833
779	inet->cork.length += length;	834	inet->cork.length += length;
		835	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
		836	(rt->u.dst.dev->features & NETIF_F_UFO)) {
		837
		838	if(ip_ufo_append_data(sk, getfrag, from, length, hh_len,
		839	fragheaderlen, transhdrlen, mtu, flags))
		840	goto error;
		841
		842	return 0;
		843	}
780		844
781	/* So, what's going on in the loop below?	845	/* So, what's going on in the loop below?
782	*	846	*
@@ -1008,14 +1072,23 @@ ssize_t ip_append_page(struct sock sk, struct page page,
1008	return -EINVAL;	1072	return -EINVAL;
1009		1073
1010	inet->cork.length += size;	1074	inet->cork.length += size;
		1075	if ((sk->sk_protocol == IPPROTO_UDP) &&
		1076	(rt->u.dst.dev->features & NETIF_F_UFO))
		1077	skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
		1078
1011		1079
1012	while (size > 0) {	1080	while (size > 0) {
1013	int i;	1081	int i;
1014		1082
1015	/* Check if the remaining data fits into current packet. */	1083	if (skb_shinfo(skb)->ufo_size)
1016	len = mtu - skb->len;	1084	len = size;
1017	if (len < size)	1085	else {
1018	len = maxfraglen - skb->len;	1086
		1087	/* Check if the remaining data fits into current packet. */
		1088	len = mtu - skb->len;
		1089	if (len < size)
		1090	len = maxfraglen - skb->len;
		1091	}
1019	if (len <= 0) {	1092	if (len <= 0) {
1020	struct sk_buff *skb_prev;	1093	struct sk_buff *skb_prev;
1021	char *data;	1094	char *data;
@@ -1023,10 +1096,7 @@ ssize_t ip_append_page(struct sock sk, struct page page,
1023	int alloclen;	1096	int alloclen;
1024		1097
1025	skb_prev = skb;	1098	skb_prev = skb;
1026	if (skb_prev)	1099	fraggap = skb_prev->len - maxfraglen;
1027	fraggap = skb_prev->len - maxfraglen;
1028	else
1029	fraggap = 0;
1030		1100
1031	alloclen = fragheaderlen + hh_len + fraggap + 15;	1101	alloclen = fragheaderlen + hh_len + fraggap + 15;
1032	skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);	1102	skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);


diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 07a80b56e8dc..422ab68ee7fb 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -50,7 +50,7 @@
50	#include <linux/netfilter_ipv4/ip_conntrack_core.h>	50	#include <linux/netfilter_ipv4/ip_conntrack_core.h>
51	#include <linux/netfilter_ipv4/listhelp.h>	51	#include <linux/netfilter_ipv4/listhelp.h>
52		52
53	#define IP_CONNTRACK_VERSION "2.3"	53	#define IP_CONNTRACK_VERSION "2.4"
54		54
55	#if 0	55	#if 0
56	#define DEBUGP printk	56	#define DEBUGP printk
@@ -148,16 +148,20 @@ DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
148	static int ip_conntrack_hash_rnd_initted;	148	static int ip_conntrack_hash_rnd_initted;
149	static unsigned int ip_conntrack_hash_rnd;	149	static unsigned int ip_conntrack_hash_rnd;
150		150
151	static u_int32_t	151	static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
152	hash_conntrack(const struct ip_conntrack_tuple *tuple)	152	unsigned int size, unsigned int rnd)
153	{	153	{
154	#if 0
155	dump_tuple(tuple);
156	#endif
157	return (jhash_3words(tuple->src.ip,	154	return (jhash_3words(tuple->src.ip,
158	(tuple->dst.ip ^ tuple->dst.protonum),	155	(tuple->dst.ip ^ tuple->dst.protonum),
159	(tuple->src.u.all \| (tuple->dst.u.all << 16)),	156	(tuple->src.u.all \| (tuple->dst.u.all << 16)),
160	ip_conntrack_hash_rnd) % ip_conntrack_htable_size);	157	rnd) % size);
		158	}
		159
		160	static u_int32_t
		161	hash_conntrack(const struct ip_conntrack_tuple *tuple)
		162	{
		163	return __hash_conntrack(tuple, ip_conntrack_htable_size,
		164	ip_conntrack_hash_rnd);
161	}	165	}
162		166
163	int	167	int
@@ -1341,14 +1345,13 @@ static int kill_all(struct ip_conntrack i, void data)
1341	return 1;	1345	return 1;
1342	}	1346	}
1343		1347
1344	static void free_conntrack_hash(void)	1348	static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
1345	{	1349	{
1346	if (ip_conntrack_vmalloc)	1350	if (vmalloced)
1347	vfree(ip_conntrack_hash);	1351	vfree(hash);
1348	else	1352	else
1349	free_pages((unsigned long)ip_conntrack_hash,	1353	free_pages((unsigned long)hash,
1350	get_order(sizeof(struct list_head)	1354	get_order(sizeof(struct list_head) * size));
1351	* ip_conntrack_htable_size));
1352	}	1355	}
1353		1356
1354	void ip_conntrack_flush()	1357	void ip_conntrack_flush()
@@ -1378,12 +1381,83 @@ void ip_conntrack_cleanup(void)
1378	ip_conntrack_flush();	1381	ip_conntrack_flush();
1379	kmem_cache_destroy(ip_conntrack_cachep);	1382	kmem_cache_destroy(ip_conntrack_cachep);
1380	kmem_cache_destroy(ip_conntrack_expect_cachep);	1383	kmem_cache_destroy(ip_conntrack_expect_cachep);
1381	free_conntrack_hash();	1384	free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
		1385	ip_conntrack_htable_size);
1382	nf_unregister_sockopt(&so_getorigdst);	1386	nf_unregister_sockopt(&so_getorigdst);
1383	}	1387	}
1384		1388
1385	static int hashsize;	1389	static struct list_head alloc_hashtable(int size, int vmalloced)
1386	module_param(hashsize, int, 0400);	1390	{
		1391	struct list_head *hash;
		1392	unsigned int i;
		1393
		1394	*vmalloced = 0;
		1395	hash = (void*)__get_free_pages(GFP_KERNEL,
		1396	get_order(sizeof(struct list_head)
		1397	* size));
		1398	if (!hash) {
		1399	*vmalloced = 1;
		1400	printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
		1401	hash = vmalloc(sizeof(struct list_head) * size);
		1402	}
		1403
		1404	if (hash)
		1405	for (i = 0; i < size; i++)
		1406	INIT_LIST_HEAD(&hash[i]);
		1407
		1408	return hash;
		1409	}
		1410
		1411	int set_hashsize(const char val, struct kernel_param kp)
		1412	{
		1413	int i, bucket, hashsize, vmalloced;
		1414	int old_vmalloced, old_size;
		1415	int rnd;
		1416	struct list_head hash, old_hash;
		1417	struct ip_conntrack_tuple_hash *h;
		1418
		1419	/* On boot, we can set this without any fancy locking. */
		1420	if (!ip_conntrack_htable_size)
		1421	return param_set_int(val, kp);
		1422
		1423	hashsize = simple_strtol(val, NULL, 0);
		1424	if (!hashsize)
		1425	return -EINVAL;
		1426
		1427	hash = alloc_hashtable(hashsize, &vmalloced);
		1428	if (!hash)
		1429	return -ENOMEM;
		1430
		1431	/* We have to rehash for the new table anyway, so we also can
		1432	* use a new random seed */
		1433	get_random_bytes(&rnd, 4);
		1434
		1435	write_lock_bh(&ip_conntrack_lock);
		1436	for (i = 0; i < ip_conntrack_htable_size; i++) {
		1437	while (!list_empty(&ip_conntrack_hash[i])) {
		1438	h = list_entry(ip_conntrack_hash[i].next,
		1439	struct ip_conntrack_tuple_hash, list);
		1440	list_del(&h->list);
		1441	bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
		1442	list_add_tail(&h->list, &hash[bucket]);
		1443	}
		1444	}
		1445	old_size = ip_conntrack_htable_size;
		1446	old_vmalloced = ip_conntrack_vmalloc;
		1447	old_hash = ip_conntrack_hash;
		1448
		1449	ip_conntrack_htable_size = hashsize;
		1450	ip_conntrack_vmalloc = vmalloced;
		1451	ip_conntrack_hash = hash;
		1452	ip_conntrack_hash_rnd = rnd;
		1453	write_unlock_bh(&ip_conntrack_lock);
		1454
		1455	free_conntrack_hash(old_hash, old_vmalloced, old_size);
		1456	return 0;
		1457	}
		1458
		1459	module_param_call(hashsize, set_hashsize, param_get_uint,
		1460	&ip_conntrack_htable_size, 0600);
1387		1461
1388	int __init ip_conntrack_init(void)	1462	int __init ip_conntrack_init(void)
1389	{	1463	{
@@ -1392,9 +1466,7 @@ int __init ip_conntrack_init(void)
1392		1466
1393	/* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB	1467	/* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1394	* machine has 256 buckets. >= 1GB machines have 8192 buckets. */	1468	* machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1395	if (hashsize) {	1469	if (!ip_conntrack_htable_size) {
1396	ip_conntrack_htable_size = hashsize;
1397	} else {
1398	ip_conntrack_htable_size	1470	ip_conntrack_htable_size
1399	= (((num_physpages << PAGE_SHIFT) / 16384)	1471	= (((num_physpages << PAGE_SHIFT) / 16384)
1400	/ sizeof(struct list_head));	1472	/ sizeof(struct list_head));
@@ -1416,20 +1488,8 @@ int __init ip_conntrack_init(void)
1416	return ret;	1488	return ret;
1417	}	1489	}
1418		1490
1419	/* AK: the hash table is twice as big than needed because it	1491	ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
1420	uses list_head. it would be much nicer to caches to use a	1492	&ip_conntrack_vmalloc);
1421	single pointer list head here. */
1422	ip_conntrack_vmalloc = 0;
1423	ip_conntrack_hash
1424	=(void*)__get_free_pages(GFP_KERNEL,
1425	get_order(sizeof(struct list_head)
1426	*ip_conntrack_htable_size));
1427	if (!ip_conntrack_hash) {
1428	ip_conntrack_vmalloc = 1;
1429	printk(KERN_WARNING "ip_conntrack: falling back to vmalloc.\n");
1430	ip_conntrack_hash = vmalloc(sizeof(struct list_head)
1431	* ip_conntrack_htable_size);
1432	}
1433	if (!ip_conntrack_hash) {	1493	if (!ip_conntrack_hash) {
1434	printk(KERN_ERR "Unable to create ip_conntrack_hash\n");	1494	printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1435	goto err_unreg_sockopt;	1495	goto err_unreg_sockopt;
@@ -1461,9 +1521,6 @@ int __init ip_conntrack_init(void)
1461	ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;	1521	ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
1462	write_unlock_bh(&ip_conntrack_lock);	1522	write_unlock_bh(&ip_conntrack_lock);
1463		1523
1464	for (i = 0; i < ip_conntrack_htable_size; i++)
1465	INIT_LIST_HEAD(&ip_conntrack_hash[i]);
1466
1467	/* For use by ipt_REJECT */	1524	/* For use by ipt_REJECT */
1468	ip_ct_attach = ip_conntrack_attach;	1525	ip_ct_attach = ip_conntrack_attach;
1469		1526
@@ -1478,7 +1535,8 @@ int __init ip_conntrack_init(void)
1478	err_free_conntrack_slab:	1535	err_free_conntrack_slab:
1479	kmem_cache_destroy(ip_conntrack_cachep);	1536	kmem_cache_destroy(ip_conntrack_cachep);
1480	err_free_hash:	1537	err_free_hash:
1481	free_conntrack_hash();	1538	free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
		1539	ip_conntrack_htable_size);
1482	err_unreg_sockopt:	1540	err_unreg_sockopt:
1483	nf_unregister_sockopt(&so_getorigdst);	1541	nf_unregister_sockopt(&so_getorigdst);
1484		1542


diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index f7943ba1f43c..a65e508fbd40 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c
@@ -90,9 +90,7 @@ fold_field(void *mib[], int offt)
90	unsigned long res = 0;	90	unsigned long res = 0;
91	int i;	91	int i;
92		92
93	for (i = 0; i < NR_CPUS; i++) {	93	for_each_cpu(i) {
94	if (!cpu_possible(i))
95	continue;
96	res += (((unsigned long ) per_cpu_ptr(mib[0], i)) + offt);	94	res += (((unsigned long ) per_cpu_ptr(mib[0], i)) + offt);
97	res += (((unsigned long ) per_cpu_ptr(mib[1], i)) + offt);	95	res += (((unsigned long ) per_cpu_ptr(mib[1], i)) + offt);
98	}	96	}


diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 677419d0c9ad..3e98b57578dc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c
@@ -2239,6 +2239,7 @@ static int tcp_ack_update_window(struct sock sk, struct tcp_sock tp,
2239	/* Note, it is the only place, where	2239	/* Note, it is the only place, where
2240	* fast path is recovered for sending TCP.	2240	* fast path is recovered for sending TCP.
2241	*/	2241	*/
		2242	tp->pred_flags = 0;
2242	tcp_fast_path_check(sk, tp);	2243	tcp_fast_path_check(sk, tp);
2243		2244
2244	if (nwin > tp->max_window) {	2245	if (nwin > tp->max_window) {