aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/devinet.c3
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv4/icmp.c5
-rw-r--r--net/ipv4/ip_output.c88
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c132
-rw-r--r--net/ipv4/proc.c4
-rw-r--r--net/ipv4/tcp_input.c1
7 files changed, 180 insertions, 55 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 74f2207e131a..4ec4b2ca6ab1 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -715,6 +715,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
715 break; 715 break;
716 ret = 0; 716 ret = 0;
717 if (ifa->ifa_mask != sin->sin_addr.s_addr) { 717 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
718 u32 old_mask = ifa->ifa_mask;
718 inet_del_ifa(in_dev, ifap, 0); 719 inet_del_ifa(in_dev, ifap, 0);
719 ifa->ifa_mask = sin->sin_addr.s_addr; 720 ifa->ifa_mask = sin->sin_addr.s_addr;
720 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); 721 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
@@ -728,7 +729,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
728 if ((dev->flags & IFF_BROADCAST) && 729 if ((dev->flags & IFF_BROADCAST) &&
729 (ifa->ifa_prefixlen < 31) && 730 (ifa->ifa_prefixlen < 31) &&
730 (ifa->ifa_broadcast == 731 (ifa->ifa_broadcast ==
731 (ifa->ifa_local|~ifa->ifa_mask))) { 732 (ifa->ifa_local|~old_mask))) {
732 ifa->ifa_broadcast = (ifa->ifa_local | 733 ifa->ifa_broadcast = (ifa->ifa_local |
733 ~sin->sin_addr.s_addr); 734 ~sin->sin_addr.s_addr);
734 } 735 }
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 0093ea08c7f5..66247f38b371 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2404,7 +2404,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
2404 prefix = htonl(l->key); 2404 prefix = htonl(l->key);
2405 2405
2406 list_for_each_entry_rcu(fa, &li->falh, fa_list) { 2406 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2407 const struct fib_info *fi = rcu_dereference(fa->fa_info); 2407 const struct fib_info *fi = fa->fa_info;
2408 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); 2408 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
2409 2409
2410 if (fa->fa_type == RTN_BROADCAST 2410 if (fa->fa_type == RTN_BROADCAST
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 90dca711ac9f..175e093ec564 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1108,12 +1108,9 @@ void __init icmp_init(struct net_proto_family *ops)
1108 struct inet_sock *inet; 1108 struct inet_sock *inet;
1109 int i; 1109 int i;
1110 1110
1111 for (i = 0; i < NR_CPUS; i++) { 1111 for_each_cpu(i) {
1112 int err; 1112 int err;
1113 1113
1114 if (!cpu_possible(i))
1115 continue;
1116
1117 err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, 1114 err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP,
1118 &per_cpu(__icmp_socket, i)); 1115 &per_cpu(__icmp_socket, i));
1119 1116
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1ad5202e556b..17758234a3e3 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -275,7 +275,8 @@ int ip_output(struct sk_buff *skb)
275{ 275{
276 IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS); 276 IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
277 277
278 if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->tso_size) 278 if (skb->len > dst_mtu(skb->dst) &&
279 !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
279 return ip_fragment(skb, ip_finish_output); 280 return ip_fragment(skb, ip_finish_output);
280 else 281 else
281 return ip_finish_output(skb); 282 return ip_finish_output(skb);
@@ -688,6 +689,60 @@ csum_page(struct page *page, int offset, int copy)
688 return csum; 689 return csum;
689} 690}
690 691
692inline int ip_ufo_append_data(struct sock *sk,
693 int getfrag(void *from, char *to, int offset, int len,
694 int odd, struct sk_buff *skb),
695 void *from, int length, int hh_len, int fragheaderlen,
696 int transhdrlen, int mtu,unsigned int flags)
697{
698 struct sk_buff *skb;
699 int err;
700
701 /* There is support for UDP fragmentation offload by network
702 * device, so create one single skb packet containing complete
703 * udp datagram
704 */
705 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
706 skb = sock_alloc_send_skb(sk,
707 hh_len + fragheaderlen + transhdrlen + 20,
708 (flags & MSG_DONTWAIT), &err);
709
710 if (skb == NULL)
711 return err;
712
713 /* reserve space for Hardware header */
714 skb_reserve(skb, hh_len);
715
716 /* create space for UDP/IP header */
717 skb_put(skb,fragheaderlen + transhdrlen);
718
719 /* initialize network header pointer */
720 skb->nh.raw = skb->data;
721
722 /* initialize protocol header pointer */
723 skb->h.raw = skb->data + fragheaderlen;
724
725 skb->ip_summed = CHECKSUM_HW;
726 skb->csum = 0;
727 sk->sk_sndmsg_off = 0;
728 }
729
730 err = skb_append_datato_frags(sk,skb, getfrag, from,
731 (length - transhdrlen));
732 if (!err) {
733 /* specify the length of each IP datagram fragment*/
734 skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
735 __skb_queue_tail(&sk->sk_write_queue, skb);
736
737 return 0;
738 }
739 /* There is not enough support do UFO ,
740 * so follow normal path
741 */
742 kfree_skb(skb);
743 return err;
744}
745
691/* 746/*
692 * ip_append_data() and ip_append_page() can make one large IP datagram 747 * ip_append_data() and ip_append_page() can make one large IP datagram
693 * from many pieces of data. Each pieces will be holded on the socket 748 * from many pieces of data. Each pieces will be holded on the socket
@@ -777,6 +832,15 @@ int ip_append_data(struct sock *sk,
777 csummode = CHECKSUM_HW; 832 csummode = CHECKSUM_HW;
778 833
779 inet->cork.length += length; 834 inet->cork.length += length;
835 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
836 (rt->u.dst.dev->features & NETIF_F_UFO)) {
837
838 if(ip_ufo_append_data(sk, getfrag, from, length, hh_len,
839 fragheaderlen, transhdrlen, mtu, flags))
840 goto error;
841
842 return 0;
843 }
780 844
781 /* So, what's going on in the loop below? 845 /* So, what's going on in the loop below?
782 * 846 *
@@ -1008,14 +1072,23 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1008 return -EINVAL; 1072 return -EINVAL;
1009 1073
1010 inet->cork.length += size; 1074 inet->cork.length += size;
1075 if ((sk->sk_protocol == IPPROTO_UDP) &&
1076 (rt->u.dst.dev->features & NETIF_F_UFO))
1077 skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
1078
1011 1079
1012 while (size > 0) { 1080 while (size > 0) {
1013 int i; 1081 int i;
1014 1082
1015 /* Check if the remaining data fits into current packet. */ 1083 if (skb_shinfo(skb)->ufo_size)
1016 len = mtu - skb->len; 1084 len = size;
1017 if (len < size) 1085 else {
1018 len = maxfraglen - skb->len; 1086
1087 /* Check if the remaining data fits into current packet. */
1088 len = mtu - skb->len;
1089 if (len < size)
1090 len = maxfraglen - skb->len;
1091 }
1019 if (len <= 0) { 1092 if (len <= 0) {
1020 struct sk_buff *skb_prev; 1093 struct sk_buff *skb_prev;
1021 char *data; 1094 char *data;
@@ -1023,10 +1096,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1023 int alloclen; 1096 int alloclen;
1024 1097
1025 skb_prev = skb; 1098 skb_prev = skb;
1026 if (skb_prev) 1099 fraggap = skb_prev->len - maxfraglen;
1027 fraggap = skb_prev->len - maxfraglen;
1028 else
1029 fraggap = 0;
1030 1100
1031 alloclen = fragheaderlen + hh_len + fraggap + 15; 1101 alloclen = fragheaderlen + hh_len + fraggap + 15;
1032 skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation); 1102 skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 07a80b56e8dc..422ab68ee7fb 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -50,7 +50,7 @@
50#include <linux/netfilter_ipv4/ip_conntrack_core.h> 50#include <linux/netfilter_ipv4/ip_conntrack_core.h>
51#include <linux/netfilter_ipv4/listhelp.h> 51#include <linux/netfilter_ipv4/listhelp.h>
52 52
53#define IP_CONNTRACK_VERSION "2.3" 53#define IP_CONNTRACK_VERSION "2.4"
54 54
55#if 0 55#if 0
56#define DEBUGP printk 56#define DEBUGP printk
@@ -148,16 +148,20 @@ DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
148static int ip_conntrack_hash_rnd_initted; 148static int ip_conntrack_hash_rnd_initted;
149static unsigned int ip_conntrack_hash_rnd; 149static unsigned int ip_conntrack_hash_rnd;
150 150
151static u_int32_t 151static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
152hash_conntrack(const struct ip_conntrack_tuple *tuple) 152 unsigned int size, unsigned int rnd)
153{ 153{
154#if 0
155 dump_tuple(tuple);
156#endif
157 return (jhash_3words(tuple->src.ip, 154 return (jhash_3words(tuple->src.ip,
158 (tuple->dst.ip ^ tuple->dst.protonum), 155 (tuple->dst.ip ^ tuple->dst.protonum),
159 (tuple->src.u.all | (tuple->dst.u.all << 16)), 156 (tuple->src.u.all | (tuple->dst.u.all << 16)),
160 ip_conntrack_hash_rnd) % ip_conntrack_htable_size); 157 rnd) % size);
158}
159
160static u_int32_t
161hash_conntrack(const struct ip_conntrack_tuple *tuple)
162{
163 return __hash_conntrack(tuple, ip_conntrack_htable_size,
164 ip_conntrack_hash_rnd);
161} 165}
162 166
163int 167int
@@ -1341,14 +1345,13 @@ static int kill_all(struct ip_conntrack *i, void *data)
1341 return 1; 1345 return 1;
1342} 1346}
1343 1347
1344static void free_conntrack_hash(void) 1348static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
1345{ 1349{
1346 if (ip_conntrack_vmalloc) 1350 if (vmalloced)
1347 vfree(ip_conntrack_hash); 1351 vfree(hash);
1348 else 1352 else
1349 free_pages((unsigned long)ip_conntrack_hash, 1353 free_pages((unsigned long)hash,
1350 get_order(sizeof(struct list_head) 1354 get_order(sizeof(struct list_head) * size));
1351 * ip_conntrack_htable_size));
1352} 1355}
1353 1356
1354void ip_conntrack_flush() 1357void ip_conntrack_flush()
@@ -1378,12 +1381,83 @@ void ip_conntrack_cleanup(void)
1378 ip_conntrack_flush(); 1381 ip_conntrack_flush();
1379 kmem_cache_destroy(ip_conntrack_cachep); 1382 kmem_cache_destroy(ip_conntrack_cachep);
1380 kmem_cache_destroy(ip_conntrack_expect_cachep); 1383 kmem_cache_destroy(ip_conntrack_expect_cachep);
1381 free_conntrack_hash(); 1384 free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
1385 ip_conntrack_htable_size);
1382 nf_unregister_sockopt(&so_getorigdst); 1386 nf_unregister_sockopt(&so_getorigdst);
1383} 1387}
1384 1388
1385static int hashsize; 1389static struct list_head *alloc_hashtable(int size, int *vmalloced)
1386module_param(hashsize, int, 0400); 1390{
1391 struct list_head *hash;
1392 unsigned int i;
1393
1394 *vmalloced = 0;
1395 hash = (void*)__get_free_pages(GFP_KERNEL,
1396 get_order(sizeof(struct list_head)
1397 * size));
1398 if (!hash) {
1399 *vmalloced = 1;
1400 printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
1401 hash = vmalloc(sizeof(struct list_head) * size);
1402 }
1403
1404 if (hash)
1405 for (i = 0; i < size; i++)
1406 INIT_LIST_HEAD(&hash[i]);
1407
1408 return hash;
1409}
1410
1411int set_hashsize(const char *val, struct kernel_param *kp)
1412{
1413 int i, bucket, hashsize, vmalloced;
1414 int old_vmalloced, old_size;
1415 int rnd;
1416 struct list_head *hash, *old_hash;
1417 struct ip_conntrack_tuple_hash *h;
1418
1419 /* On boot, we can set this without any fancy locking. */
1420 if (!ip_conntrack_htable_size)
1421 return param_set_int(val, kp);
1422
1423 hashsize = simple_strtol(val, NULL, 0);
1424 if (!hashsize)
1425 return -EINVAL;
1426
1427 hash = alloc_hashtable(hashsize, &vmalloced);
1428 if (!hash)
1429 return -ENOMEM;
1430
1431 /* We have to rehash for the new table anyway, so we also can
1432 * use a new random seed */
1433 get_random_bytes(&rnd, 4);
1434
1435 write_lock_bh(&ip_conntrack_lock);
1436 for (i = 0; i < ip_conntrack_htable_size; i++) {
1437 while (!list_empty(&ip_conntrack_hash[i])) {
1438 h = list_entry(ip_conntrack_hash[i].next,
1439 struct ip_conntrack_tuple_hash, list);
1440 list_del(&h->list);
1441 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1442 list_add_tail(&h->list, &hash[bucket]);
1443 }
1444 }
1445 old_size = ip_conntrack_htable_size;
1446 old_vmalloced = ip_conntrack_vmalloc;
1447 old_hash = ip_conntrack_hash;
1448
1449 ip_conntrack_htable_size = hashsize;
1450 ip_conntrack_vmalloc = vmalloced;
1451 ip_conntrack_hash = hash;
1452 ip_conntrack_hash_rnd = rnd;
1453 write_unlock_bh(&ip_conntrack_lock);
1454
1455 free_conntrack_hash(old_hash, old_vmalloced, old_size);
1456 return 0;
1457}
1458
1459module_param_call(hashsize, set_hashsize, param_get_uint,
1460 &ip_conntrack_htable_size, 0600);
1387 1461
1388int __init ip_conntrack_init(void) 1462int __init ip_conntrack_init(void)
1389{ 1463{
@@ -1392,9 +1466,7 @@ int __init ip_conntrack_init(void)
1392 1466
1393 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB 1467 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1394 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ 1468 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1395 if (hashsize) { 1469 if (!ip_conntrack_htable_size) {
1396 ip_conntrack_htable_size = hashsize;
1397 } else {
1398 ip_conntrack_htable_size 1470 ip_conntrack_htable_size
1399 = (((num_physpages << PAGE_SHIFT) / 16384) 1471 = (((num_physpages << PAGE_SHIFT) / 16384)
1400 / sizeof(struct list_head)); 1472 / sizeof(struct list_head));
@@ -1416,20 +1488,8 @@ int __init ip_conntrack_init(void)
1416 return ret; 1488 return ret;
1417 } 1489 }
1418 1490
1419 /* AK: the hash table is twice as big than needed because it 1491 ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
1420 uses list_head. it would be much nicer to caches to use a 1492 &ip_conntrack_vmalloc);
1421 single pointer list head here. */
1422 ip_conntrack_vmalloc = 0;
1423 ip_conntrack_hash
1424 =(void*)__get_free_pages(GFP_KERNEL,
1425 get_order(sizeof(struct list_head)
1426 *ip_conntrack_htable_size));
1427 if (!ip_conntrack_hash) {
1428 ip_conntrack_vmalloc = 1;
1429 printk(KERN_WARNING "ip_conntrack: falling back to vmalloc.\n");
1430 ip_conntrack_hash = vmalloc(sizeof(struct list_head)
1431 * ip_conntrack_htable_size);
1432 }
1433 if (!ip_conntrack_hash) { 1493 if (!ip_conntrack_hash) {
1434 printk(KERN_ERR "Unable to create ip_conntrack_hash\n"); 1494 printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1435 goto err_unreg_sockopt; 1495 goto err_unreg_sockopt;
@@ -1461,9 +1521,6 @@ int __init ip_conntrack_init(void)
1461 ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp; 1521 ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
1462 write_unlock_bh(&ip_conntrack_lock); 1522 write_unlock_bh(&ip_conntrack_lock);
1463 1523
1464 for (i = 0; i < ip_conntrack_htable_size; i++)
1465 INIT_LIST_HEAD(&ip_conntrack_hash[i]);
1466
1467 /* For use by ipt_REJECT */ 1524 /* For use by ipt_REJECT */
1468 ip_ct_attach = ip_conntrack_attach; 1525 ip_ct_attach = ip_conntrack_attach;
1469 1526
@@ -1478,7 +1535,8 @@ int __init ip_conntrack_init(void)
1478err_free_conntrack_slab: 1535err_free_conntrack_slab:
1479 kmem_cache_destroy(ip_conntrack_cachep); 1536 kmem_cache_destroy(ip_conntrack_cachep);
1480err_free_hash: 1537err_free_hash:
1481 free_conntrack_hash(); 1538 free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
1539 ip_conntrack_htable_size);
1482err_unreg_sockopt: 1540err_unreg_sockopt:
1483 nf_unregister_sockopt(&so_getorigdst); 1541 nf_unregister_sockopt(&so_getorigdst);
1484 1542
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index f7943ba1f43c..a65e508fbd40 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -90,9 +90,7 @@ fold_field(void *mib[], int offt)
90 unsigned long res = 0; 90 unsigned long res = 0;
91 int i; 91 int i;
92 92
93 for (i = 0; i < NR_CPUS; i++) { 93 for_each_cpu(i) {
94 if (!cpu_possible(i))
95 continue;
96 res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt); 94 res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
97 res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt); 95 res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
98 } 96 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 677419d0c9ad..3e98b57578dc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2239,6 +2239,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
2239 /* Note, it is the only place, where 2239 /* Note, it is the only place, where
2240 * fast path is recovered for sending TCP. 2240 * fast path is recovered for sending TCP.
2241 */ 2241 */
2242 tp->pred_flags = 0;
2242 tcp_fast_path_check(sk, tp); 2243 tcp_fast_path_check(sk, tp);
2243 2244
2244 if (nwin > tp->max_window) { 2245 if (nwin > tp->max_window) {