aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c56
-rw-r--r--net/ipv4/arp.c41
-rw-r--r--net/ipv4/devinet.c6
-rw-r--r--net/ipv4/fib_frontend.c6
-rw-r--r--net/ipv4/fib_trie.c12
-rw-r--r--net/ipv4/gre.c1
-rw-r--r--net/ipv4/icmp.c14
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/inet_lro.c74
-rw-r--r--net/ipv4/inetpeer.c293
-rw-r--r--net/ipv4/ip_fragment.c5
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ip_input.c4
-rw-r--r--net/ipv4/ip_output.c35
-rw-r--r--net/ipv4/ipconfig.c75
-rw-r--r--net/ipv4/ipmr.c3
-rw-r--r--net/ipv4/netfilter.c60
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c26
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c14
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c210
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c2
-rw-r--r--net/ipv4/raw.c36
-rw-r--r--net/ipv4/route.c94
-rw-r--r--net/ipv4/syncookies.c1
-rw-r--r--net/ipv4/sysctl_net_ipv4.c14
-rw-r--r--net/ipv4/tcp.c10
-rw-r--r--net/ipv4/tcp_input.c46
-rw-r--r--net/ipv4/tcp_ipv4.c11
-rw-r--r--net/ipv4/tcp_minisocks.c6
-rw-r--r--net/ipv4/udp.c15
-rw-r--r--net/ipv4/xfrm4_output.c7
-rw-r--r--net/ipv4/xfrm4_policy.c2
33 files changed, 492 insertions, 693 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index eae1f676f87..1b745d412cf 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -465,8 +465,10 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
465 if (addr_len < sizeof(struct sockaddr_in)) 465 if (addr_len < sizeof(struct sockaddr_in))
466 goto out; 466 goto out;
467 467
468 if (addr->sin_family != AF_INET) 468 if (addr->sin_family != AF_INET) {
469 err = -EAFNOSUPPORT;
469 goto out; 470 goto out;
471 }
470 472
471 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); 473 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
472 474
@@ -1438,11 +1440,11 @@ EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
1438unsigned long snmp_fold_field(void __percpu *mib[], int offt) 1440unsigned long snmp_fold_field(void __percpu *mib[], int offt)
1439{ 1441{
1440 unsigned long res = 0; 1442 unsigned long res = 0;
1441 int i; 1443 int i, j;
1442 1444
1443 for_each_possible_cpu(i) { 1445 for_each_possible_cpu(i) {
1444 res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt); 1446 for (j = 0; j < SNMP_ARRAY_SZ; j++)
1445 res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt); 1447 res += *(((unsigned long *) per_cpu_ptr(mib[j], i)) + offt);
1446 } 1448 }
1447 return res; 1449 return res;
1448} 1450}
@@ -1456,28 +1458,19 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
1456 int cpu; 1458 int cpu;
1457 1459
1458 for_each_possible_cpu(cpu) { 1460 for_each_possible_cpu(cpu) {
1459 void *bhptr, *userptr; 1461 void *bhptr;
1460 struct u64_stats_sync *syncp; 1462 struct u64_stats_sync *syncp;
1461 u64 v_bh, v_user; 1463 u64 v;
1462 unsigned int start; 1464 unsigned int start;
1463 1465
1464 /* first mib used by softirq context, we must use _bh() accessors */ 1466 bhptr = per_cpu_ptr(mib[0], cpu);
1465 bhptr = per_cpu_ptr(SNMP_STAT_BHPTR(mib), cpu);
1466 syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); 1467 syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
1467 do { 1468 do {
1468 start = u64_stats_fetch_begin_bh(syncp); 1469 start = u64_stats_fetch_begin_bh(syncp);
1469 v_bh = *(((u64 *) bhptr) + offt); 1470 v = *(((u64 *) bhptr) + offt);
1470 } while (u64_stats_fetch_retry_bh(syncp, start)); 1471 } while (u64_stats_fetch_retry_bh(syncp, start));
1471 1472
1472 /* second mib used in USER context */ 1473 res += v;
1473 userptr = per_cpu_ptr(SNMP_STAT_USRPTR(mib), cpu);
1474 syncp = (struct u64_stats_sync *)(userptr + syncp_offset);
1475 do {
1476 start = u64_stats_fetch_begin(syncp);
1477 v_user = *(((u64 *) userptr) + offt);
1478 } while (u64_stats_fetch_retry(syncp, start));
1479
1480 res += v_bh + v_user;
1481 } 1474 }
1482 return res; 1475 return res;
1483} 1476}
@@ -1489,25 +1482,28 @@ int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
1489 BUG_ON(ptr == NULL); 1482 BUG_ON(ptr == NULL);
1490 ptr[0] = __alloc_percpu(mibsize, align); 1483 ptr[0] = __alloc_percpu(mibsize, align);
1491 if (!ptr[0]) 1484 if (!ptr[0])
1492 goto err0; 1485 return -ENOMEM;
1486#if SNMP_ARRAY_SZ == 2
1493 ptr[1] = __alloc_percpu(mibsize, align); 1487 ptr[1] = __alloc_percpu(mibsize, align);
1494 if (!ptr[1]) 1488 if (!ptr[1]) {
1495 goto err1; 1489 free_percpu(ptr[0]);
1490 ptr[0] = NULL;
1491 return -ENOMEM;
1492 }
1493#endif
1496 return 0; 1494 return 0;
1497err1:
1498 free_percpu(ptr[0]);
1499 ptr[0] = NULL;
1500err0:
1501 return -ENOMEM;
1502} 1495}
1503EXPORT_SYMBOL_GPL(snmp_mib_init); 1496EXPORT_SYMBOL_GPL(snmp_mib_init);
1504 1497
1505void snmp_mib_free(void __percpu *ptr[2]) 1498void snmp_mib_free(void __percpu *ptr[SNMP_ARRAY_SZ])
1506{ 1499{
1500 int i;
1501
1507 BUG_ON(ptr == NULL); 1502 BUG_ON(ptr == NULL);
1508 free_percpu(ptr[0]); 1503 for (i = 0; i < SNMP_ARRAY_SZ; i++) {
1509 free_percpu(ptr[1]); 1504 free_percpu(ptr[i]);
1510 ptr[0] = ptr[1] = NULL; 1505 ptr[i] = NULL;
1506 }
1511} 1507}
1512EXPORT_SYMBOL_GPL(snmp_mib_free); 1508EXPORT_SYMBOL_GPL(snmp_mib_free);
1513 1509
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 1b74d3b6437..96a164aa136 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -97,7 +97,6 @@
97#include <linux/init.h> 97#include <linux/init.h>
98#include <linux/net.h> 98#include <linux/net.h>
99#include <linux/rcupdate.h> 99#include <linux/rcupdate.h>
100#include <linux/jhash.h>
101#include <linux/slab.h> 100#include <linux/slab.h>
102#ifdef CONFIG_SYSCTL 101#ifdef CONFIG_SYSCTL
103#include <linux/sysctl.h> 102#include <linux/sysctl.h>
@@ -139,8 +138,6 @@ static const struct neigh_ops arp_generic_ops = {
139 .error_report = arp_error_report, 138 .error_report = arp_error_report,
140 .output = neigh_resolve_output, 139 .output = neigh_resolve_output,
141 .connected_output = neigh_connected_output, 140 .connected_output = neigh_connected_output,
142 .hh_output = dev_queue_xmit,
143 .queue_xmit = dev_queue_xmit,
144}; 141};
145 142
146static const struct neigh_ops arp_hh_ops = { 143static const struct neigh_ops arp_hh_ops = {
@@ -149,16 +146,12 @@ static const struct neigh_ops arp_hh_ops = {
149 .error_report = arp_error_report, 146 .error_report = arp_error_report,
150 .output = neigh_resolve_output, 147 .output = neigh_resolve_output,
151 .connected_output = neigh_resolve_output, 148 .connected_output = neigh_resolve_output,
152 .hh_output = dev_queue_xmit,
153 .queue_xmit = dev_queue_xmit,
154}; 149};
155 150
156static const struct neigh_ops arp_direct_ops = { 151static const struct neigh_ops arp_direct_ops = {
157 .family = AF_INET, 152 .family = AF_INET,
158 .output = dev_queue_xmit, 153 .output = neigh_direct_output,
159 .connected_output = dev_queue_xmit, 154 .connected_output = neigh_direct_output,
160 .hh_output = dev_queue_xmit,
161 .queue_xmit = dev_queue_xmit,
162}; 155};
163 156
164static const struct neigh_ops arp_broken_ops = { 157static const struct neigh_ops arp_broken_ops = {
@@ -167,8 +160,6 @@ static const struct neigh_ops arp_broken_ops = {
167 .error_report = arp_error_report, 160 .error_report = arp_error_report,
168 .output = neigh_compat_output, 161 .output = neigh_compat_output,
169 .connected_output = neigh_compat_output, 162 .connected_output = neigh_compat_output,
170 .hh_output = dev_queue_xmit,
171 .queue_xmit = dev_queue_xmit,
172}; 163};
173 164
174struct neigh_table arp_tbl = { 165struct neigh_table arp_tbl = {
@@ -232,7 +223,7 @@ static u32 arp_hash(const void *pkey,
232 const struct net_device *dev, 223 const struct net_device *dev,
233 __u32 hash_rnd) 224 __u32 hash_rnd)
234{ 225{
235 return jhash_2words(*(u32 *)pkey, dev->ifindex, hash_rnd); 226 return arp_hashfn(*(u32 *)pkey, dev, hash_rnd);
236} 227}
237 228
238static int arp_constructor(struct neighbour *neigh) 229static int arp_constructor(struct neighbour *neigh)
@@ -259,7 +250,7 @@ static int arp_constructor(struct neighbour *neigh)
259 if (!dev->header_ops) { 250 if (!dev->header_ops) {
260 neigh->nud_state = NUD_NOARP; 251 neigh->nud_state = NUD_NOARP;
261 neigh->ops = &arp_direct_ops; 252 neigh->ops = &arp_direct_ops;
262 neigh->output = neigh->ops->queue_xmit; 253 neigh->output = neigh_direct_output;
263 } else { 254 } else {
264 /* Good devices (checked by reading texts, but only Ethernet is 255 /* Good devices (checked by reading texts, but only Ethernet is
265 tested) 256 tested)
@@ -518,30 +509,6 @@ EXPORT_SYMBOL(arp_find);
518 509
519/* END OF OBSOLETE FUNCTIONS */ 510/* END OF OBSOLETE FUNCTIONS */
520 511
521int arp_bind_neighbour(struct dst_entry *dst)
522{
523 struct net_device *dev = dst->dev;
524 struct neighbour *n = dst->neighbour;
525
526 if (dev == NULL)
527 return -EINVAL;
528 if (n == NULL) {
529 __be32 nexthop = ((struct rtable *)dst)->rt_gateway;
530 if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
531 nexthop = 0;
532 n = __neigh_lookup_errno(
533#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
534 dev->type == ARPHRD_ATM ?
535 clip_tbl_hook :
536#endif
537 &arp_tbl, &nexthop, dev);
538 if (IS_ERR(n))
539 return PTR_ERR(n);
540 dst->neighbour = n;
541 }
542 return 0;
543}
544
545/* 512/*
546 * Check if we can use proxy ARP for this path 513 * Check if we can use proxy ARP for this path
547 */ 514 */
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 0d4a184af16..37b3c188d8b 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1833,8 +1833,8 @@ void __init devinet_init(void)
1833 1833
1834 rtnl_af_register(&inet_af_ops); 1834 rtnl_af_register(&inet_af_ops);
1835 1835
1836 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); 1836 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1837 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); 1837 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1838 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); 1838 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1839} 1839}
1840 1840
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 22524716fe7..92fc5f69f5d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1124,9 +1124,9 @@ static struct pernet_operations fib_net_ops = {
1124 1124
1125void __init ip_fib_init(void) 1125void __init ip_fib_init(void)
1126{ 1126{
1127 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); 1127 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
1128 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); 1128 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
1129 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); 1129 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
1130 1130
1131 register_pernet_subsys(&fib_net_ops); 1131 register_pernet_subsys(&fib_net_ops);
1132 register_netdevice_notifier(&fib_netdev_notifier); 1132 register_netdevice_notifier(&fib_netdev_notifier);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 58c25ea5a5c..de9e2978476 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -110,9 +110,10 @@ struct leaf {
110 110
111struct leaf_info { 111struct leaf_info {
112 struct hlist_node hlist; 112 struct hlist_node hlist;
113 struct rcu_head rcu;
114 int plen; 113 int plen;
114 u32 mask_plen; /* ntohl(inet_make_mask(plen)) */
115 struct list_head falh; 115 struct list_head falh;
116 struct rcu_head rcu;
116}; 117};
117 118
118struct tnode { 119struct tnode {
@@ -451,6 +452,7 @@ static struct leaf_info *leaf_info_new(int plen)
451 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); 452 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
452 if (li) { 453 if (li) {
453 li->plen = plen; 454 li->plen = plen;
455 li->mask_plen = ntohl(inet_make_mask(plen));
454 INIT_LIST_HEAD(&li->falh); 456 INIT_LIST_HEAD(&li->falh);
455 } 457 }
456 return li; 458 return li;
@@ -1359,10 +1361,8 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
1359 1361
1360 hlist_for_each_entry_rcu(li, node, hhead, hlist) { 1362 hlist_for_each_entry_rcu(li, node, hhead, hlist) {
1361 struct fib_alias *fa; 1363 struct fib_alias *fa;
1362 int plen = li->plen;
1363 __be32 mask = inet_make_mask(plen);
1364 1364
1365 if (l->key != (key & ntohl(mask))) 1365 if (l->key != (key & li->mask_plen))
1366 continue; 1366 continue;
1367 1367
1368 list_for_each_entry_rcu(fa, &li->falh, fa_list) { 1368 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
@@ -1394,7 +1394,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
1394#ifdef CONFIG_IP_FIB_TRIE_STATS 1394#ifdef CONFIG_IP_FIB_TRIE_STATS
1395 t->stats.semantic_match_passed++; 1395 t->stats.semantic_match_passed++;
1396#endif 1396#endif
1397 res->prefixlen = plen; 1397 res->prefixlen = li->plen;
1398 res->nh_sel = nhsel; 1398 res->nh_sel = nhsel;
1399 res->type = fa->fa_type; 1399 res->type = fa->fa_type;
1400 res->scope = fa->fa_info->fib_scope; 1400 res->scope = fa->fa_info->fib_scope;
@@ -1402,7 +1402,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
1402 res->table = tb; 1402 res->table = tb;
1403 res->fa_head = &li->falh; 1403 res->fa_head = &li->falh;
1404 if (!(fib_flags & FIB_LOOKUP_NOREF)) 1404 if (!(fib_flags & FIB_LOOKUP_NOREF))
1405 atomic_inc(&res->fi->fib_clntref); 1405 atomic_inc(&fi->fib_clntref);
1406 return 0; 1406 return 0;
1407 } 1407 }
1408 } 1408 }
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index c6933f2ea31..9dbe10875fb 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -16,7 +16,6 @@
16#include <linux/skbuff.h> 16#include <linux/skbuff.h>
17#include <linux/in.h> 17#include <linux/in.h>
18#include <linux/netdevice.h> 18#include <linux/netdevice.h>
19#include <linux/version.h>
20#include <linux/spinlock.h> 19#include <linux/spinlock.h>
21#include <net/protocol.h> 20#include <net/protocol.h>
22#include <net/gre.h> 21#include <net/gre.h>
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 5395e45dcce..23ef31baa1a 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -380,6 +380,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
380 struct icmp_bxm *param) 380 struct icmp_bxm *param)
381{ 381{
382 struct rtable *rt, *rt2; 382 struct rtable *rt, *rt2;
383 struct flowi4 fl4_dec;
383 int err; 384 int err;
384 385
385 memset(fl4, 0, sizeof(*fl4)); 386 memset(fl4, 0, sizeof(*fl4));
@@ -408,19 +409,19 @@ static struct rtable *icmp_route_lookup(struct net *net,
408 } else 409 } else
409 return rt; 410 return rt;
410 411
411 err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(fl4), AF_INET); 412 err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4_dec), AF_INET);
412 if (err) 413 if (err)
413 goto relookup_failed; 414 goto relookup_failed;
414 415
415 if (inet_addr_type(net, fl4->saddr) == RTN_LOCAL) { 416 if (inet_addr_type(net, fl4_dec.saddr) == RTN_LOCAL) {
416 rt2 = __ip_route_output_key(net, fl4); 417 rt2 = __ip_route_output_key(net, &fl4_dec);
417 if (IS_ERR(rt2)) 418 if (IS_ERR(rt2))
418 err = PTR_ERR(rt2); 419 err = PTR_ERR(rt2);
419 } else { 420 } else {
420 struct flowi4 fl4_2 = {}; 421 struct flowi4 fl4_2 = {};
421 unsigned long orefdst; 422 unsigned long orefdst;
422 423
423 fl4_2.daddr = fl4->saddr; 424 fl4_2.daddr = fl4_dec.saddr;
424 rt2 = ip_route_output_key(net, &fl4_2); 425 rt2 = ip_route_output_key(net, &fl4_2);
425 if (IS_ERR(rt2)) { 426 if (IS_ERR(rt2)) {
426 err = PTR_ERR(rt2); 427 err = PTR_ERR(rt2);
@@ -428,7 +429,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
428 } 429 }
429 /* Ugh! */ 430 /* Ugh! */
430 orefdst = skb_in->_skb_refdst; /* save old refdst */ 431 orefdst = skb_in->_skb_refdst; /* save old refdst */
431 err = ip_route_input(skb_in, fl4->daddr, fl4->saddr, 432 err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr,
432 RT_TOS(tos), rt2->dst.dev); 433 RT_TOS(tos), rt2->dst.dev);
433 434
434 dst_release(&rt2->dst); 435 dst_release(&rt2->dst);
@@ -440,10 +441,11 @@ static struct rtable *icmp_route_lookup(struct net *net,
440 goto relookup_failed; 441 goto relookup_failed;
441 442
442 rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, 443 rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst,
443 flowi4_to_flowi(fl4), NULL, 444 flowi4_to_flowi(&fl4_dec), NULL,
444 XFRM_LOOKUP_ICMP); 445 XFRM_LOOKUP_ICMP);
445 if (!IS_ERR(rt2)) { 446 if (!IS_ERR(rt2)) {
446 dst_release(&rt->dst); 447 dst_release(&rt->dst);
448 memcpy(fl4, &fl4_dec, sizeof(*fl4));
447 rt = rt2; 449 rt = rt2;
448 } else if (PTR_ERR(rt2) == -EPERM) { 450 } else if (PTR_ERR(rt2) == -EPERM) {
449 if (rt) 451 if (rt)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 3267d389843..389a2e6a17f 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -869,7 +869,7 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
869 } 869 }
870 870
871 return netlink_dump_start(idiagnl, skb, nlh, 871 return netlink_dump_start(idiagnl, skb, nlh,
872 inet_diag_dump, NULL); 872 inet_diag_dump, NULL, 0);
873 } 873 }
874 874
875 return inet_diag_get_exact(skb, nlh); 875 return inet_diag_get_exact(skb, nlh);
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index 85a0f75dae6..ef7ae6049a5 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -146,8 +146,7 @@ static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
146} 146}
147 147
148static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb, 148static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
149 struct iphdr *iph, struct tcphdr *tcph, 149 struct iphdr *iph, struct tcphdr *tcph)
150 u16 vlan_tag, struct vlan_group *vgrp)
151{ 150{
152 int nr_frags; 151 int nr_frags;
153 __be32 *ptr; 152 __be32 *ptr;
@@ -173,8 +172,6 @@ static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
173 } 172 }
174 173
175 lro_desc->mss = tcp_data_len; 174 lro_desc->mss = tcp_data_len;
176 lro_desc->vgrp = vgrp;
177 lro_desc->vlan_tag = vlan_tag;
178 lro_desc->active = 1; 175 lro_desc->active = 1;
179 176
180 lro_desc->data_csum = lro_tcp_data_csum(iph, tcph, 177 lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
@@ -309,29 +306,17 @@ static void lro_flush(struct net_lro_mgr *lro_mgr,
309 306
310 skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss; 307 skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
311 308
312 if (lro_desc->vgrp) { 309 if (lro_mgr->features & LRO_F_NAPI)
313 if (lro_mgr->features & LRO_F_NAPI) 310 netif_receive_skb(lro_desc->parent);
314 vlan_hwaccel_receive_skb(lro_desc->parent, 311 else
315 lro_desc->vgrp, 312 netif_rx(lro_desc->parent);
316 lro_desc->vlan_tag);
317 else
318 vlan_hwaccel_rx(lro_desc->parent,
319 lro_desc->vgrp,
320 lro_desc->vlan_tag);
321
322 } else {
323 if (lro_mgr->features & LRO_F_NAPI)
324 netif_receive_skb(lro_desc->parent);
325 else
326 netif_rx(lro_desc->parent);
327 }
328 313
329 LRO_INC_STATS(lro_mgr, flushed); 314 LRO_INC_STATS(lro_mgr, flushed);
330 lro_clear_desc(lro_desc); 315 lro_clear_desc(lro_desc);
331} 316}
332 317
333static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, 318static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
334 struct vlan_group *vgrp, u16 vlan_tag, void *priv) 319 void *priv)
335{ 320{
336 struct net_lro_desc *lro_desc; 321 struct net_lro_desc *lro_desc;
337 struct iphdr *iph; 322 struct iphdr *iph;
@@ -360,7 +345,7 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
360 goto out; 345 goto out;
361 346
362 skb->ip_summed = lro_mgr->ip_summed_aggr; 347 skb->ip_summed = lro_mgr->ip_summed_aggr;
363 lro_init_desc(lro_desc, skb, iph, tcph, vlan_tag, vgrp); 348 lro_init_desc(lro_desc, skb, iph, tcph);
364 LRO_INC_STATS(lro_mgr, aggregated); 349 LRO_INC_STATS(lro_mgr, aggregated);
365 return 0; 350 return 0;
366 } 351 }
@@ -433,8 +418,7 @@ static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr,
433static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, 418static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
434 struct skb_frag_struct *frags, 419 struct skb_frag_struct *frags,
435 int len, int true_size, 420 int len, int true_size,
436 struct vlan_group *vgrp, 421 void *priv, __wsum sum)
437 u16 vlan_tag, void *priv, __wsum sum)
438{ 422{
439 struct net_lro_desc *lro_desc; 423 struct net_lro_desc *lro_desc;
440 struct iphdr *iph; 424 struct iphdr *iph;
@@ -480,7 +464,7 @@ static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
480 tcph = (void *)((u8 *)skb->data + vlan_hdr_len 464 tcph = (void *)((u8 *)skb->data + vlan_hdr_len
481 + IP_HDR_LEN(iph)); 465 + IP_HDR_LEN(iph));
482 466
483 lro_init_desc(lro_desc, skb, iph, tcph, 0, NULL); 467 lro_init_desc(lro_desc, skb, iph, tcph);
484 LRO_INC_STATS(lro_mgr, aggregated); 468 LRO_INC_STATS(lro_mgr, aggregated);
485 return NULL; 469 return NULL;
486 } 470 }
@@ -514,7 +498,7 @@ void lro_receive_skb(struct net_lro_mgr *lro_mgr,
514 struct sk_buff *skb, 498 struct sk_buff *skb,
515 void *priv) 499 void *priv)
516{ 500{
517 if (__lro_proc_skb(lro_mgr, skb, NULL, 0, priv)) { 501 if (__lro_proc_skb(lro_mgr, skb, priv)) {
518 if (lro_mgr->features & LRO_F_NAPI) 502 if (lro_mgr->features & LRO_F_NAPI)
519 netif_receive_skb(skb); 503 netif_receive_skb(skb);
520 else 504 else
@@ -523,29 +507,13 @@ void lro_receive_skb(struct net_lro_mgr *lro_mgr,
523} 507}
524EXPORT_SYMBOL(lro_receive_skb); 508EXPORT_SYMBOL(lro_receive_skb);
525 509
526void lro_vlan_hwaccel_receive_skb(struct net_lro_mgr *lro_mgr,
527 struct sk_buff *skb,
528 struct vlan_group *vgrp,
529 u16 vlan_tag,
530 void *priv)
531{
532 if (__lro_proc_skb(lro_mgr, skb, vgrp, vlan_tag, priv)) {
533 if (lro_mgr->features & LRO_F_NAPI)
534 vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag);
535 else
536 vlan_hwaccel_rx(skb, vgrp, vlan_tag);
537 }
538}
539EXPORT_SYMBOL(lro_vlan_hwaccel_receive_skb);
540
541void lro_receive_frags(struct net_lro_mgr *lro_mgr, 510void lro_receive_frags(struct net_lro_mgr *lro_mgr,
542 struct skb_frag_struct *frags, 511 struct skb_frag_struct *frags,
543 int len, int true_size, void *priv, __wsum sum) 512 int len, int true_size, void *priv, __wsum sum)
544{ 513{
545 struct sk_buff *skb; 514 struct sk_buff *skb;
546 515
547 skb = __lro_proc_segment(lro_mgr, frags, len, true_size, NULL, 0, 516 skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum);
548 priv, sum);
549 if (!skb) 517 if (!skb)
550 return; 518 return;
551 519
@@ -556,26 +524,6 @@ void lro_receive_frags(struct net_lro_mgr *lro_mgr,
556} 524}
557EXPORT_SYMBOL(lro_receive_frags); 525EXPORT_SYMBOL(lro_receive_frags);
558 526
559void lro_vlan_hwaccel_receive_frags(struct net_lro_mgr *lro_mgr,
560 struct skb_frag_struct *frags,
561 int len, int true_size,
562 struct vlan_group *vgrp,
563 u16 vlan_tag, void *priv, __wsum sum)
564{
565 struct sk_buff *skb;
566
567 skb = __lro_proc_segment(lro_mgr, frags, len, true_size, vgrp,
568 vlan_tag, priv, sum);
569 if (!skb)
570 return;
571
572 if (lro_mgr->features & LRO_F_NAPI)
573 vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag);
574 else
575 vlan_hwaccel_rx(skb, vgrp, vlan_tag);
576}
577EXPORT_SYMBOL(lro_vlan_hwaccel_receive_frags);
578
579void lro_flush_all(struct net_lro_mgr *lro_mgr) 527void lro_flush_all(struct net_lro_mgr *lro_mgr)
580{ 528{
581 int i; 529 int i;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index ce616d92cc5..e38213817d0 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -54,15 +54,11 @@
54 * 1. Nodes may appear in the tree only with the pool lock held. 54 * 1. Nodes may appear in the tree only with the pool lock held.
55 * 2. Nodes may disappear from the tree only with the pool lock held 55 * 2. Nodes may disappear from the tree only with the pool lock held
56 * AND reference count being 0. 56 * AND reference count being 0.
57 * 3. Nodes appears and disappears from unused node list only under 57 * 3. Global variable peer_total is modified under the pool lock.
58 * "inet_peer_unused_lock". 58 * 4. struct inet_peer fields modification:
59 * 4. Global variable peer_total is modified under the pool lock.
60 * 5. struct inet_peer fields modification:
61 * avl_left, avl_right, avl_parent, avl_height: pool lock 59 * avl_left, avl_right, avl_parent, avl_height: pool lock
62 * unused: unused node list lock
63 * refcnt: atomically against modifications on other CPU; 60 * refcnt: atomically against modifications on other CPU;
64 * usually under some other lock to prevent node disappearing 61 * usually under some other lock to prevent node disappearing
65 * dtime: unused node list lock
66 * daddr: unchangeable 62 * daddr: unchangeable
67 * ip_id_count: atomic value (no lock needed) 63 * ip_id_count: atomic value (no lock needed)
68 */ 64 */
@@ -104,19 +100,6 @@ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries m
104 * aggressively at this stage */ 100 * aggressively at this stage */
105int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ 101int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */
106int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ 102int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */
107int inet_peer_gc_mintime __read_mostly = 10 * HZ;
108int inet_peer_gc_maxtime __read_mostly = 120 * HZ;
109
110static struct {
111 struct list_head list;
112 spinlock_t lock;
113} unused_peers = {
114 .list = LIST_HEAD_INIT(unused_peers.list),
115 .lock = __SPIN_LOCK_UNLOCKED(unused_peers.lock),
116};
117
118static void peer_check_expire(unsigned long dummy);
119static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0);
120 103
121 104
122/* Called from ip_output.c:ip_init */ 105/* Called from ip_output.c:ip_init */
@@ -142,21 +125,6 @@ void __init inet_initpeers(void)
142 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, 125 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
143 NULL); 126 NULL);
144 127
145 /* All the timers, started at system startup tend
146 to synchronize. Perturb it a bit.
147 */
148 peer_periodic_timer.expires = jiffies
149 + net_random() % inet_peer_gc_maxtime
150 + inet_peer_gc_maxtime;
151 add_timer(&peer_periodic_timer);
152}
153
154/* Called with or without local BH being disabled. */
155static void unlink_from_unused(struct inet_peer *p)
156{
157 spin_lock_bh(&unused_peers.lock);
158 list_del_init(&p->unused);
159 spin_unlock_bh(&unused_peers.lock);
160} 128}
161 129
162static int addr_compare(const struct inetpeer_addr *a, 130static int addr_compare(const struct inetpeer_addr *a,
@@ -203,20 +171,6 @@ static int addr_compare(const struct inetpeer_addr *a,
203 u; \ 171 u; \
204}) 172})
205 173
206static bool atomic_add_unless_return(atomic_t *ptr, int a, int u, int *newv)
207{
208 int cur, old = atomic_read(ptr);
209
210 while (old != u) {
211 *newv = old + a;
212 cur = atomic_cmpxchg(ptr, old, *newv);
213 if (cur == old)
214 return true;
215 old = cur;
216 }
217 return false;
218}
219
220/* 174/*
221 * Called with rcu_read_lock() 175 * Called with rcu_read_lock()
222 * Because we hold no lock against a writer, its quite possible we fall 176 * Because we hold no lock against a writer, its quite possible we fall
@@ -225,8 +179,7 @@ static bool atomic_add_unless_return(atomic_t *ptr, int a, int u, int *newv)
225 * We exit from this function if number of links exceeds PEER_MAXDEPTH 179 * We exit from this function if number of links exceeds PEER_MAXDEPTH
226 */ 180 */
227static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, 181static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
228 struct inet_peer_base *base, 182 struct inet_peer_base *base)
229 int *newrefcnt)
230{ 183{
231 struct inet_peer *u = rcu_dereference(base->root); 184 struct inet_peer *u = rcu_dereference(base->root);
232 int count = 0; 185 int count = 0;
@@ -235,11 +188,9 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
235 int cmp = addr_compare(daddr, &u->daddr); 188 int cmp = addr_compare(daddr, &u->daddr);
236 if (cmp == 0) { 189 if (cmp == 0) {
237 /* Before taking a reference, check if this entry was 190 /* Before taking a reference, check if this entry was
238 * deleted, unlink_from_pool() sets refcnt=-1 to make 191 * deleted (refcnt=-1)
239 * distinction between an unused entry (refcnt=0) and
240 * a freed one.
241 */ 192 */
242 if (!atomic_add_unless_return(&u->refcnt, 1, -1, newrefcnt)) 193 if (!atomic_add_unless(&u->refcnt, 1, -1))
243 u = NULL; 194 u = NULL;
244 return u; 195 return u;
245 } 196 }
@@ -366,137 +317,99 @@ static void inetpeer_free_rcu(struct rcu_head *head)
366 kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu)); 317 kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
367} 318}
368 319
369/* May be called with local BH enabled. */
370static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, 320static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
371 struct inet_peer __rcu **stack[PEER_MAXDEPTH]) 321 struct inet_peer __rcu **stack[PEER_MAXDEPTH])
372{ 322{
373 int do_free; 323 struct inet_peer __rcu ***stackptr, ***delp;
374 324
375 do_free = 0; 325 if (lookup(&p->daddr, stack, base) != p)
376 326 BUG();
377 write_seqlock_bh(&base->lock); 327 delp = stackptr - 1; /* *delp[0] == p */
378 /* Check the reference counter. It was artificially incremented by 1 328 if (p->avl_left == peer_avl_empty_rcu) {
379 * in cleanup() function to prevent sudden disappearing. If we can 329 *delp[0] = p->avl_right;
380 * atomically (because of lockless readers) take this last reference, 330 --stackptr;
381 * it's safe to remove the node and free it later. 331 } else {
382 * We use refcnt=-1 to alert lockless readers this entry is deleted. 332 /* look for a node to insert instead of p */
383 */ 333 struct inet_peer *t;
384 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { 334 t = lookup_rightempty(p, base);
385 struct inet_peer __rcu ***stackptr, ***delp; 335 BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t);
386 if (lookup(&p->daddr, stack, base) != p) 336 **--stackptr = t->avl_left;
387 BUG(); 337 /* t is removed, t->daddr > x->daddr for any
388 delp = stackptr - 1; /* *delp[0] == p */ 338 * x in p->avl_left subtree.
389 if (p->avl_left == peer_avl_empty_rcu) { 339 * Put t in the old place of p. */
390 *delp[0] = p->avl_right; 340 RCU_INIT_POINTER(*delp[0], t);
391 --stackptr; 341 t->avl_left = p->avl_left;
392 } else { 342 t->avl_right = p->avl_right;
393 /* look for a node to insert instead of p */ 343 t->avl_height = p->avl_height;
394 struct inet_peer *t; 344 BUG_ON(delp[1] != &p->avl_left);
395 t = lookup_rightempty(p, base); 345 delp[1] = &t->avl_left; /* was &p->avl_left */
396 BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t);
397 **--stackptr = t->avl_left;
398 /* t is removed, t->daddr > x->daddr for any
399 * x in p->avl_left subtree.
400 * Put t in the old place of p. */
401 RCU_INIT_POINTER(*delp[0], t);
402 t->avl_left = p->avl_left;
403 t->avl_right = p->avl_right;
404 t->avl_height = p->avl_height;
405 BUG_ON(delp[1] != &p->avl_left);
406 delp[1] = &t->avl_left; /* was &p->avl_left */
407 }
408 peer_avl_rebalance(stack, stackptr, base);
409 base->total--;
410 do_free = 1;
411 } 346 }
412 write_sequnlock_bh(&base->lock); 347 peer_avl_rebalance(stack, stackptr, base);
413 348 base->total--;
414 if (do_free) 349 call_rcu(&p->rcu, inetpeer_free_rcu);
415 call_rcu(&p->rcu, inetpeer_free_rcu);
416 else
417 /* The node is used again. Decrease the reference counter
418 * back. The loop "cleanup -> unlink_from_unused
419 * -> unlink_from_pool -> putpeer -> link_to_unused
420 * -> cleanup (for the same node)"
421 * doesn't really exist because the entry will have a
422 * recent deletion time and will not be cleaned again soon.
423 */
424 inet_putpeer(p);
425} 350}
426 351
427static struct inet_peer_base *family_to_base(int family) 352static struct inet_peer_base *family_to_base(int family)
428{ 353{
429 return (family == AF_INET ? &v4_peers : &v6_peers); 354 return family == AF_INET ? &v4_peers : &v6_peers;
430}
431
432static struct inet_peer_base *peer_to_base(struct inet_peer *p)
433{
434 return family_to_base(p->daddr.family);
435} 355}
436 356
437/* May be called with local BH enabled. */ 357/* perform garbage collect on all items stacked during a lookup */
438static int cleanup_once(unsigned long ttl, struct inet_peer __rcu **stack[PEER_MAXDEPTH]) 358static int inet_peer_gc(struct inet_peer_base *base,
359 struct inet_peer __rcu **stack[PEER_MAXDEPTH],
360 struct inet_peer __rcu ***stackptr)
439{ 361{
440 struct inet_peer *p = NULL; 362 struct inet_peer *p, *gchead = NULL;
441 363 __u32 delta, ttl;
442 /* Remove the first entry from the list of unused nodes. */ 364 int cnt = 0;
443 spin_lock_bh(&unused_peers.lock);
444 if (!list_empty(&unused_peers.list)) {
445 __u32 delta;
446
447 p = list_first_entry(&unused_peers.list, struct inet_peer, unused);
448 delta = (__u32)jiffies - p->dtime;
449 365
450 if (delta < ttl) { 366 if (base->total >= inet_peer_threshold)
451 /* Do not prune fresh entries. */ 367 ttl = 0; /* be aggressive */
452 spin_unlock_bh(&unused_peers.lock); 368 else
453 return -1; 369 ttl = inet_peer_maxttl
370 - (inet_peer_maxttl - inet_peer_minttl) / HZ *
371 base->total / inet_peer_threshold * HZ;
372 stackptr--; /* last stack slot is peer_avl_empty */
373 while (stackptr > stack) {
374 stackptr--;
375 p = rcu_deref_locked(**stackptr, base);
376 if (atomic_read(&p->refcnt) == 0) {
377 smp_rmb();
378 delta = (__u32)jiffies - p->dtime;
379 if (delta >= ttl &&
380 atomic_cmpxchg(&p->refcnt, 0, -1) == 0) {
381 p->gc_next = gchead;
382 gchead = p;
383 }
454 } 384 }
455
456 list_del_init(&p->unused);
457
458 /* Grab an extra reference to prevent node disappearing
459 * before unlink_from_pool() call. */
460 atomic_inc(&p->refcnt);
461 } 385 }
462 spin_unlock_bh(&unused_peers.lock); 386 while ((p = gchead) != NULL) {
463 387 gchead = p->gc_next;
464 if (p == NULL) 388 cnt++;
465 /* It means that the total number of USED entries has 389 unlink_from_pool(p, base, stack);
466 * grown over inet_peer_threshold. It shouldn't really 390 }
467 * happen because of entry limits in route cache. */ 391 return cnt;
468 return -1;
469
470 unlink_from_pool(p, peer_to_base(p), stack);
471 return 0;
472} 392}
473 393
474/* Called with or without local BH being disabled. */ 394struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create)
475struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
476{ 395{
477 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; 396 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
478 struct inet_peer_base *base = family_to_base(daddr->family); 397 struct inet_peer_base *base = family_to_base(daddr->family);
479 struct inet_peer *p; 398 struct inet_peer *p;
480 unsigned int sequence; 399 unsigned int sequence;
481 int invalidated, newrefcnt = 0; 400 int invalidated, gccnt = 0;
482 401
483 /* Look up for the address quickly, lockless. 402 /* Attempt a lockless lookup first.
484 * Because of a concurrent writer, we might not find an existing entry. 403 * Because of a concurrent writer, we might not find an existing entry.
485 */ 404 */
486 rcu_read_lock(); 405 rcu_read_lock();
487 sequence = read_seqbegin(&base->lock); 406 sequence = read_seqbegin(&base->lock);
488 p = lookup_rcu(daddr, base, &newrefcnt); 407 p = lookup_rcu(daddr, base);
489 invalidated = read_seqretry(&base->lock, sequence); 408 invalidated = read_seqretry(&base->lock, sequence);
490 rcu_read_unlock(); 409 rcu_read_unlock();
491 410
492 if (p) { 411 if (p)
493found: /* The existing node has been found.
494 * Remove the entry from unused list if it was there.
495 */
496 if (newrefcnt == 1)
497 unlink_from_unused(p);
498 return p; 412 return p;
499 }
500 413
501 /* If no writer did a change during our lookup, we can return early. */ 414 /* If no writer did a change during our lookup, we can return early. */
502 if (!create && !invalidated) 415 if (!create && !invalidated)
@@ -506,18 +419,27 @@ found: /* The existing node has been found.
506 * At least, nodes should be hot in our cache. 419 * At least, nodes should be hot in our cache.
507 */ 420 */
508 write_seqlock_bh(&base->lock); 421 write_seqlock_bh(&base->lock);
422relookup:
509 p = lookup(daddr, stack, base); 423 p = lookup(daddr, stack, base);
510 if (p != peer_avl_empty) { 424 if (p != peer_avl_empty) {
511 newrefcnt = atomic_inc_return(&p->refcnt); 425 atomic_inc(&p->refcnt);
512 write_sequnlock_bh(&base->lock); 426 write_sequnlock_bh(&base->lock);
513 goto found; 427 return p;
428 }
429 if (!gccnt) {
430 gccnt = inet_peer_gc(base, stack, stackptr);
431 if (gccnt && create)
432 goto relookup;
514 } 433 }
515 p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; 434 p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
516 if (p) { 435 if (p) {
517 p->daddr = *daddr; 436 p->daddr = *daddr;
518 atomic_set(&p->refcnt, 1); 437 atomic_set(&p->refcnt, 1);
519 atomic_set(&p->rid, 0); 438 atomic_set(&p->rid, 0);
520 atomic_set(&p->ip_id_count, secure_ip_id(daddr->addr.a4)); 439 atomic_set(&p->ip_id_count,
440 (daddr->family == AF_INET) ?
441 secure_ip_id(daddr->addr.a4) :
442 secure_ipv6_id(daddr->addr.a6));
521 p->tcp_ts_stamp = 0; 443 p->tcp_ts_stamp = 0;
522 p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; 444 p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
523 p->rate_tokens = 0; 445 p->rate_tokens = 0;
@@ -525,7 +447,6 @@ found: /* The existing node has been found.
525 p->pmtu_expires = 0; 447 p->pmtu_expires = 0;
526 p->pmtu_orig = 0; 448 p->pmtu_orig = 0;
527 memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); 449 memset(&p->redirect_learned, 0, sizeof(p->redirect_learned));
528 INIT_LIST_HEAD(&p->unused);
529 450
530 451
531 /* Link the node. */ 452 /* Link the node. */
@@ -534,63 +455,15 @@ found: /* The existing node has been found.
534 } 455 }
535 write_sequnlock_bh(&base->lock); 456 write_sequnlock_bh(&base->lock);
536 457
537 if (base->total >= inet_peer_threshold)
538 /* Remove one less-recently-used entry. */
539 cleanup_once(0, stack);
540
541 return p; 458 return p;
542} 459}
543
544static int compute_total(void)
545{
546 return v4_peers.total + v6_peers.total;
547}
548EXPORT_SYMBOL_GPL(inet_getpeer); 460EXPORT_SYMBOL_GPL(inet_getpeer);
549 461
550/* Called with local BH disabled. */
551static void peer_check_expire(unsigned long dummy)
552{
553 unsigned long now = jiffies;
554 int ttl, total;
555 struct inet_peer __rcu **stack[PEER_MAXDEPTH];
556
557 total = compute_total();
558 if (total >= inet_peer_threshold)
559 ttl = inet_peer_minttl;
560 else
561 ttl = inet_peer_maxttl
562 - (inet_peer_maxttl - inet_peer_minttl) / HZ *
563 total / inet_peer_threshold * HZ;
564 while (!cleanup_once(ttl, stack)) {
565 if (jiffies != now)
566 break;
567 }
568
569 /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime
570 * interval depending on the total number of entries (more entries,
571 * less interval). */
572 total = compute_total();
573 if (total >= inet_peer_threshold)
574 peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime;
575 else
576 peer_periodic_timer.expires = jiffies
577 + inet_peer_gc_maxtime
578 - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
579 total / inet_peer_threshold * HZ;
580 add_timer(&peer_periodic_timer);
581}
582
583void inet_putpeer(struct inet_peer *p) 462void inet_putpeer(struct inet_peer *p)
584{ 463{
585 local_bh_disable(); 464 p->dtime = (__u32)jiffies;
586 465 smp_mb__before_atomic_dec();
587 if (atomic_dec_and_lock(&p->refcnt, &unused_peers.lock)) { 466 atomic_dec(&p->refcnt);
588 list_add_tail(&p->unused, &unused_peers.list);
589 p->dtime = (__u32)jiffies;
590 spin_unlock(&unused_peers.lock);
591 }
592
593 local_bh_enable();
594} 467}
595EXPORT_SYMBOL_GPL(inet_putpeer); 468EXPORT_SYMBOL_GPL(inet_putpeer);
596 469
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 0ad6035f636..0e0ab98abc6 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -261,8 +261,9 @@ static void ip_expire(unsigned long arg)
261 * Only an end host needs to send an ICMP 261 * Only an end host needs to send an ICMP
262 * "Fragment Reassembly Timeout" message, per RFC792. 262 * "Fragment Reassembly Timeout" message, per RFC792.
263 */ 263 */
264 if (qp->user == IP_DEFRAG_CONNTRACK_IN && 264 if (qp->user == IP_DEFRAG_AF_PACKET ||
265 skb_rtable(head)->rt_type != RTN_LOCAL) 265 (qp->user == IP_DEFRAG_CONNTRACK_IN &&
266 skb_rtable(head)->rt_type != RTN_LOCAL))
266 goto out_rcu_unlock; 267 goto out_rcu_unlock;
267 268
268 269
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 8871067560d..d7bb94c4834 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -731,9 +731,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
731 } 731 }
732#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 732#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
733 else if (skb->protocol == htons(ETH_P_IPV6)) { 733 else if (skb->protocol == htons(ETH_P_IPV6)) {
734 struct neighbour *neigh = dst_get_neighbour(skb_dst(skb));
734 const struct in6_addr *addr6; 735 const struct in6_addr *addr6;
735 int addr_type; 736 int addr_type;
736 struct neighbour *neigh = skb_dst(skb)->neighbour;
737 737
738 if (neigh == NULL) 738 if (neigh == NULL)
739 goto tx_error; 739 goto tx_error;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index c8f48efc5fd..073a9b01c40 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -165,7 +165,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
165 (!sk->sk_bound_dev_if || 165 (!sk->sk_bound_dev_if ||
166 sk->sk_bound_dev_if == dev->ifindex) && 166 sk->sk_bound_dev_if == dev->ifindex) &&
167 net_eq(sock_net(sk), dev_net(dev))) { 167 net_eq(sock_net(sk), dev_net(dev))) {
168 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 168 if (ip_is_fragment(ip_hdr(skb))) {
169 if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) 169 if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN))
170 return 1; 170 return 1;
171 } 171 }
@@ -256,7 +256,7 @@ int ip_local_deliver(struct sk_buff *skb)
256 * Reassemble IP fragments. 256 * Reassemble IP fragments.
257 */ 257 */
258 258
259 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 259 if (ip_is_fragment(ip_hdr(skb))) {
260 if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER)) 260 if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER))
261 return 0; 261 return 0;
262 } 262 }
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a8024eaa0e8..ccaaa851ab4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -182,6 +182,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
182 struct rtable *rt = (struct rtable *)dst; 182 struct rtable *rt = (struct rtable *)dst;
183 struct net_device *dev = dst->dev; 183 struct net_device *dev = dst->dev;
184 unsigned int hh_len = LL_RESERVED_SPACE(dev); 184 unsigned int hh_len = LL_RESERVED_SPACE(dev);
185 struct neighbour *neigh;
185 186
186 if (rt->rt_type == RTN_MULTICAST) { 187 if (rt->rt_type == RTN_MULTICAST) {
187 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); 188 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len);
@@ -203,10 +204,9 @@ static inline int ip_finish_output2(struct sk_buff *skb)
203 skb = skb2; 204 skb = skb2;
204 } 205 }
205 206
206 if (dst->hh) 207 neigh = dst_get_neighbour(dst);
207 return neigh_hh_output(dst->hh, skb); 208 if (neigh)
208 else if (dst->neighbour) 209 return neigh_output(neigh, skb);
209 return dst->neighbour->output(skb);
210 210
211 if (net_ratelimit()) 211 if (net_ratelimit())
212 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); 212 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
@@ -489,7 +489,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
489 489
490 if (first_len - hlen > mtu || 490 if (first_len - hlen > mtu ||
491 ((first_len - hlen) & 7) || 491 ((first_len - hlen) & 7) ||
492 (iph->frag_off & htons(IP_MF|IP_OFFSET)) || 492 ip_is_fragment(iph) ||
493 skb_cloned(skb)) 493 skb_cloned(skb))
494 goto slow_path; 494 goto slow_path;
495 495
@@ -734,7 +734,7 @@ static inline int ip_ufo_append_data(struct sock *sk,
734 int getfrag(void *from, char *to, int offset, int len, 734 int getfrag(void *from, char *to, int offset, int len,
735 int odd, struct sk_buff *skb), 735 int odd, struct sk_buff *skb),
736 void *from, int length, int hh_len, int fragheaderlen, 736 void *from, int length, int hh_len, int fragheaderlen,
737 int transhdrlen, int mtu, unsigned int flags) 737 int transhdrlen, int maxfraglen, unsigned int flags)
738{ 738{
739 struct sk_buff *skb; 739 struct sk_buff *skb;
740 int err; 740 int err;
@@ -767,7 +767,7 @@ static inline int ip_ufo_append_data(struct sock *sk,
767 skb->csum = 0; 767 skb->csum = 0;
768 768
769 /* specify the length of each IP datagram fragment */ 769 /* specify the length of each IP datagram fragment */
770 skb_shinfo(skb)->gso_size = mtu - fragheaderlen; 770 skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen;
771 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 771 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
772 __skb_queue_tail(queue, skb); 772 __skb_queue_tail(queue, skb);
773 } 773 }
@@ -802,8 +802,6 @@ static int __ip_append_data(struct sock *sk,
802 skb = skb_peek_tail(queue); 802 skb = skb_peek_tail(queue);
803 803
804 exthdrlen = !skb ? rt->dst.header_len : 0; 804 exthdrlen = !skb ? rt->dst.header_len : 0;
805 length += exthdrlen;
806 transhdrlen += exthdrlen;
807 mtu = cork->fragsize; 805 mtu = cork->fragsize;
808 806
809 hh_len = LL_RESERVED_SPACE(rt->dst.dev); 807 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -830,10 +828,10 @@ static int __ip_append_data(struct sock *sk,
830 cork->length += length; 828 cork->length += length;
831 if (((length > mtu) || (skb && skb_is_gso(skb))) && 829 if (((length > mtu) || (skb && skb_is_gso(skb))) &&
832 (sk->sk_protocol == IPPROTO_UDP) && 830 (sk->sk_protocol == IPPROTO_UDP) &&
833 (rt->dst.dev->features & NETIF_F_UFO)) { 831 (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) {
834 err = ip_ufo_append_data(sk, queue, getfrag, from, length, 832 err = ip_ufo_append_data(sk, queue, getfrag, from, length,
835 hh_len, fragheaderlen, transhdrlen, 833 hh_len, fragheaderlen, transhdrlen,
836 mtu, flags); 834 maxfraglen, flags);
837 if (err) 835 if (err)
838 goto error; 836 goto error;
839 return 0; 837 return 0;
@@ -883,17 +881,16 @@ alloc_new_skb:
883 else 881 else
884 alloclen = fraglen; 882 alloclen = fraglen;
885 883
884 alloclen += exthdrlen;
885
886 /* The last fragment gets additional space at tail. 886 /* The last fragment gets additional space at tail.
887 * Note, with MSG_MORE we overallocate on fragments, 887 * Note, with MSG_MORE we overallocate on fragments,
888 * because we have no idea what fragment will be 888 * because we have no idea what fragment will be
889 * the last. 889 * the last.
890 */ 890 */
891 if (datalen == length + fraggap) { 891 if (datalen == length + fraggap)
892 alloclen += rt->dst.trailer_len; 892 alloclen += rt->dst.trailer_len;
893 /* make sure mtu is not reached */ 893
894 if (datalen > mtu - fragheaderlen - rt->dst.trailer_len)
895 datalen -= ALIGN(rt->dst.trailer_len, 8);
896 }
897 if (transhdrlen) { 894 if (transhdrlen) {
898 skb = sock_alloc_send_skb(sk, 895 skb = sock_alloc_send_skb(sk,
899 alloclen + hh_len + 15, 896 alloclen + hh_len + 15,
@@ -926,11 +923,11 @@ alloc_new_skb:
926 /* 923 /*
927 * Find where to start putting bytes. 924 * Find where to start putting bytes.
928 */ 925 */
929 data = skb_put(skb, fraglen); 926 data = skb_put(skb, fraglen + exthdrlen);
930 skb_set_network_header(skb, exthdrlen); 927 skb_set_network_header(skb, exthdrlen);
931 skb->transport_header = (skb->network_header + 928 skb->transport_header = (skb->network_header +
932 fragheaderlen); 929 fragheaderlen);
933 data += fragheaderlen; 930 data += fragheaderlen + exthdrlen;
934 931
935 if (fraggap) { 932 if (fraggap) {
936 skb->csum = skb_copy_and_csum_bits( 933 skb->csum = skb_copy_and_csum_bits(
@@ -1064,7 +1061,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
1064 */ 1061 */
1065 *rtp = NULL; 1062 *rtp = NULL;
1066 cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ? 1063 cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ?
1067 rt->dst.dev->mtu : dst_mtu(rt->dst.path); 1064 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1068 cork->dst = &rt->dst; 1065 cork->dst = &rt->dst;
1069 cork->length = 0; 1066 cork->length = 0;
1070 cork->tx_flags = ipc->tx_flags; 1067 cork->tx_flags = ipc->tx_flags;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index ab7e5542c1c..472a8c4f1dc 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -861,41 +861,44 @@ static void __init ic_do_bootp_ext(u8 *ext)
861#endif 861#endif
862 862
863 switch (*ext++) { 863 switch (*ext++) {
864 case 1: /* Subnet mask */ 864 case 1: /* Subnet mask */
865 if (ic_netmask == NONE) 865 if (ic_netmask == NONE)
866 memcpy(&ic_netmask, ext+1, 4); 866 memcpy(&ic_netmask, ext+1, 4);
867 break; 867 break;
868 case 3: /* Default gateway */ 868 case 3: /* Default gateway */
869 if (ic_gateway == NONE) 869 if (ic_gateway == NONE)
870 memcpy(&ic_gateway, ext+1, 4); 870 memcpy(&ic_gateway, ext+1, 4);
871 break; 871 break;
872 case 6: /* DNS server */ 872 case 6: /* DNS server */
873 servers= *ext/4; 873 servers= *ext/4;
874 if (servers > CONF_NAMESERVERS_MAX) 874 if (servers > CONF_NAMESERVERS_MAX)
875 servers = CONF_NAMESERVERS_MAX; 875 servers = CONF_NAMESERVERS_MAX;
876 for (i = 0; i < servers; i++) { 876 for (i = 0; i < servers; i++) {
877 if (ic_nameservers[i] == NONE) 877 if (ic_nameservers[i] == NONE)
878 memcpy(&ic_nameservers[i], ext+1+4*i, 4); 878 memcpy(&ic_nameservers[i], ext+1+4*i, 4);
879 } 879 }
880 break; 880 break;
881 case 12: /* Host name */ 881 case 12: /* Host name */
882 ic_bootp_string(utsname()->nodename, ext+1, *ext, __NEW_UTS_LEN); 882 ic_bootp_string(utsname()->nodename, ext+1, *ext,
883 ic_host_name_set = 1; 883 __NEW_UTS_LEN);
884 break; 884 ic_host_name_set = 1;
885 case 15: /* Domain name (DNS) */ 885 break;
886 ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain)); 886 case 15: /* Domain name (DNS) */
887 break; 887 ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain));
888 case 17: /* Root path */ 888 break;
889 if (!root_server_path[0]) 889 case 17: /* Root path */
890 ic_bootp_string(root_server_path, ext+1, *ext, sizeof(root_server_path)); 890 if (!root_server_path[0])
891 break; 891 ic_bootp_string(root_server_path, ext+1, *ext,
892 case 26: /* Interface MTU */ 892 sizeof(root_server_path));
893 memcpy(&mtu, ext+1, sizeof(mtu)); 893 break;
894 ic_dev_mtu = ntohs(mtu); 894 case 26: /* Interface MTU */
895 break; 895 memcpy(&mtu, ext+1, sizeof(mtu));
896 case 40: /* NIS Domain name (_not_ DNS) */ 896 ic_dev_mtu = ntohs(mtu);
897 ic_bootp_string(utsname()->domainname, ext+1, *ext, __NEW_UTS_LEN); 897 break;
898 break; 898 case 40: /* NIS Domain name (_not_ DNS) */
899 ic_bootp_string(utsname()->domainname, ext+1, *ext,
900 __NEW_UTS_LEN);
901 break;
899 } 902 }
900} 903}
901 904
@@ -932,7 +935,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
932 goto drop; 935 goto drop;
933 936
934 /* Fragments are not supported */ 937 /* Fragments are not supported */
935 if (h->frag_off & htons(IP_OFFSET | IP_MF)) { 938 if (ip_is_fragment(h)) {
936 if (net_ratelimit()) 939 if (net_ratelimit())
937 printk(KERN_ERR "DHCP/BOOTP: Ignoring fragmented " 940 printk(KERN_ERR "DHCP/BOOTP: Ignoring fragmented "
938 "reply.\n"); 941 "reply.\n");
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 30a7763c400..aae2bd8cd92 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2544,7 +2544,8 @@ int __init ip_mr_init(void)
2544 goto add_proto_fail; 2544 goto add_proto_fail;
2545 } 2545 }
2546#endif 2546#endif
2547 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute); 2547 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
2548 NULL, ipmr_rtm_dumproute, NULL);
2548 return 0; 2549 return 0;
2549 2550
2550#ifdef CONFIG_IP_PIMSM_V2 2551#ifdef CONFIG_IP_PIMSM_V2
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 4614babdc45..2e97e3ec1eb 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -17,51 +17,35 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
17 const struct iphdr *iph = ip_hdr(skb); 17 const struct iphdr *iph = ip_hdr(skb);
18 struct rtable *rt; 18 struct rtable *rt;
19 struct flowi4 fl4 = {}; 19 struct flowi4 fl4 = {};
20 unsigned long orefdst; 20 __be32 saddr = iph->saddr;
21 __u8 flags = 0;
21 unsigned int hh_len; 22 unsigned int hh_len;
22 unsigned int type;
23 23
24 type = inet_addr_type(net, iph->saddr); 24 if (!skb->sk && addr_type != RTN_LOCAL) {
25 if (skb->sk && inet_sk(skb->sk)->transparent) 25 if (addr_type == RTN_UNSPEC)
26 type = RTN_LOCAL; 26 addr_type = inet_addr_type(net, saddr);
27 if (addr_type == RTN_UNSPEC) 27 if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)
28 addr_type = type; 28 flags |= FLOWI_FLAG_ANYSRC;
29 else
30 saddr = 0;
31 }
29 32
30 /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause 33 /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
31 * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. 34 * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook.
32 */ 35 */
33 if (addr_type == RTN_LOCAL) { 36 fl4.daddr = iph->daddr;
34 fl4.daddr = iph->daddr; 37 fl4.saddr = saddr;
35 if (type == RTN_LOCAL) 38 fl4.flowi4_tos = RT_TOS(iph->tos);
36 fl4.saddr = iph->saddr; 39 fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
37 fl4.flowi4_tos = RT_TOS(iph->tos); 40 fl4.flowi4_mark = skb->mark;
38 fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; 41 fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : flags;
39 fl4.flowi4_mark = skb->mark; 42 rt = ip_route_output_key(net, &fl4);
40 fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; 43 if (IS_ERR(rt))
41 rt = ip_route_output_key(net, &fl4); 44 return -1;
42 if (IS_ERR(rt))
43 return -1;
44
45 /* Drop old route. */
46 skb_dst_drop(skb);
47 skb_dst_set(skb, &rt->dst);
48 } else {
49 /* non-local src, find valid iif to satisfy
50 * rp-filter when calling ip_route_input. */
51 fl4.daddr = iph->saddr;
52 rt = ip_route_output_key(net, &fl4);
53 if (IS_ERR(rt))
54 return -1;
55 45
56 orefdst = skb->_skb_refdst; 46 /* Drop old route. */
57 if (ip_route_input(skb, iph->daddr, iph->saddr, 47 skb_dst_drop(skb);
58 RT_TOS(iph->tos), rt->dst.dev) != 0) { 48 skb_dst_set(skb, &rt->dst);
59 dst_release(&rt->dst);
60 return -1;
61 }
62 dst_release(&rt->dst);
63 refdst_drop(orefdst);
64 }
65 49
66 if (skb_dst(skb)->error) 50 if (skb_dst(skb)->error)
67 return -1; 51 return -1;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 5c9e97c7901..db8d22db425 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -317,19 +317,19 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
317 hash = clusterip_hashfn(skb, cipinfo->config); 317 hash = clusterip_hashfn(skb, cipinfo->config);
318 318
319 switch (ctinfo) { 319 switch (ctinfo) {
320 case IP_CT_NEW: 320 case IP_CT_NEW:
321 ct->mark = hash; 321 ct->mark = hash;
322 break; 322 break;
323 case IP_CT_RELATED: 323 case IP_CT_RELATED:
324 case IP_CT_RELATED_REPLY: 324 case IP_CT_RELATED_REPLY:
325 /* FIXME: we don't handle expectations at the 325 /* FIXME: we don't handle expectations at the moment.
326 * moment. they can arrive on a different node than 326 * They can arrive on a different node than
327 * the master connection (e.g. FTP passive mode) */ 327 * the master connection (e.g. FTP passive mode) */
328 case IP_CT_ESTABLISHED: 328 case IP_CT_ESTABLISHED:
329 case IP_CT_ESTABLISHED_REPLY: 329 case IP_CT_ESTABLISHED_REPLY:
330 break; 330 break;
331 default: 331 default: /* Prevent gcc warnings */
332 break; 332 break;
333 } 333 }
334 334
335#ifdef DEBUG 335#ifdef DEBUG
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 1ff79e557f9..51f13f8ec72 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -40,7 +40,6 @@ static void send_reset(struct sk_buff *oldskb, int hook)
40 struct iphdr *niph; 40 struct iphdr *niph;
41 const struct tcphdr *oth; 41 const struct tcphdr *oth;
42 struct tcphdr _otcph, *tcph; 42 struct tcphdr _otcph, *tcph;
43 unsigned int addr_type;
44 43
45 /* IP header checks: fragment. */ 44 /* IP header checks: fragment. */
46 if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) 45 if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
@@ -55,6 +54,9 @@ static void send_reset(struct sk_buff *oldskb, int hook)
55 if (oth->rst) 54 if (oth->rst)
56 return; 55 return;
57 56
57 if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
58 return;
59
58 /* Check checksum */ 60 /* Check checksum */
59 if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) 61 if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
60 return; 62 return;
@@ -101,19 +103,11 @@ static void send_reset(struct sk_buff *oldskb, int hook)
101 nskb->csum_start = (unsigned char *)tcph - nskb->head; 103 nskb->csum_start = (unsigned char *)tcph - nskb->head;
102 nskb->csum_offset = offsetof(struct tcphdr, check); 104 nskb->csum_offset = offsetof(struct tcphdr, check);
103 105
104 addr_type = RTN_UNSPEC;
105 if (hook != NF_INET_FORWARD
106#ifdef CONFIG_BRIDGE_NETFILTER
107 || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED)
108#endif
109 )
110 addr_type = RTN_LOCAL;
111
112 /* ip_route_me_harder expects skb->dst to be set */ 106 /* ip_route_me_harder expects skb->dst to be set */
113 skb_dst_set_noref(nskb, skb_dst(oldskb)); 107 skb_dst_set_noref(nskb, skb_dst(oldskb));
114 108
115 nskb->protocol = htons(ETH_P_IP); 109 nskb->protocol = htons(ETH_P_IP);
116 if (ip_route_me_harder(nskb, addr_type)) 110 if (ip_route_me_harder(nskb, RTN_UNSPEC))
117 goto free_nskb; 111 goto free_nskb;
118 112
119 niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); 113 niph->ttl = ip4_dst_hoplimit(skb_dst(nskb));
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index f3a9b42b16c..9bb1b8a37a2 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -82,7 +82,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
82#endif 82#endif
83#endif 83#endif
84 /* Gather fragments. */ 84 /* Gather fragments. */
85 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 85 if (ip_is_fragment(ip_hdr(skb))) {
86 enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb); 86 enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb);
87 if (nf_ct_ipv4_gather_frags(skb, user)) 87 if (nf_ct_ipv4_gather_frags(skb, user))
88 return NF_STOLEN; 88 return NF_STOLEN;
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 8812a02078a..076b7c8c4aa 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -719,117 +719,115 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
719 719
720 l = 0; 720 l = 0;
721 switch (type) { 721 switch (type) {
722 case SNMP_INTEGER: 722 case SNMP_INTEGER:
723 len = sizeof(long); 723 len = sizeof(long);
724 if (!asn1_long_decode(ctx, end, &l)) { 724 if (!asn1_long_decode(ctx, end, &l)) {
725 kfree(id); 725 kfree(id);
726 return 0; 726 return 0;
727 } 727 }
728 *obj = kmalloc(sizeof(struct snmp_object) + len, 728 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
729 GFP_ATOMIC); 729 if (*obj == NULL) {
730 if (*obj == NULL) { 730 kfree(id);
731 kfree(id); 731 if (net_ratelimit())
732 if (net_ratelimit()) 732 pr_notice("OOM in bsalg (%d)\n", __LINE__);
733 pr_notice("OOM in bsalg (%d)\n", __LINE__); 733 return 0;
734 return 0; 734 }
735 } 735 (*obj)->syntax.l[0] = l;
736 (*obj)->syntax.l[0] = l; 736 break;
737 break; 737 case SNMP_OCTETSTR:
738 case SNMP_OCTETSTR: 738 case SNMP_OPAQUE:
739 case SNMP_OPAQUE: 739 if (!asn1_octets_decode(ctx, end, &p, &len)) {
740 if (!asn1_octets_decode(ctx, end, &p, &len)) { 740 kfree(id);
741 kfree(id); 741 return 0;
742 return 0; 742 }
743 } 743 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
744 *obj = kmalloc(sizeof(struct snmp_object) + len, 744 if (*obj == NULL) {
745 GFP_ATOMIC);
746 if (*obj == NULL) {
747 kfree(p);
748 kfree(id);
749 if (net_ratelimit())
750 pr_notice("OOM in bsalg (%d)\n", __LINE__);
751 return 0;
752 }
753 memcpy((*obj)->syntax.c, p, len);
754 kfree(p); 745 kfree(p);
755 break; 746 kfree(id);
756 case SNMP_NULL: 747 if (net_ratelimit())
757 case SNMP_NOSUCHOBJECT: 748 pr_notice("OOM in bsalg (%d)\n", __LINE__);
758 case SNMP_NOSUCHINSTANCE: 749 return 0;
759 case SNMP_ENDOFMIBVIEW: 750 }
760 len = 0; 751 memcpy((*obj)->syntax.c, p, len);
761 *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); 752 kfree(p);
762 if (*obj == NULL) { 753 break;
763 kfree(id); 754 case SNMP_NULL:
764 if (net_ratelimit()) 755 case SNMP_NOSUCHOBJECT:
765 pr_notice("OOM in bsalg (%d)\n", __LINE__); 756 case SNMP_NOSUCHINSTANCE:
766 return 0; 757 case SNMP_ENDOFMIBVIEW:
767 } 758 len = 0;
768 if (!asn1_null_decode(ctx, end)) { 759 *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
769 kfree(id); 760 if (*obj == NULL) {
770 kfree(*obj); 761 kfree(id);
771 *obj = NULL; 762 if (net_ratelimit())
772 return 0; 763 pr_notice("OOM in bsalg (%d)\n", __LINE__);
773 } 764 return 0;
774 break; 765 }
775 case SNMP_OBJECTID: 766 if (!asn1_null_decode(ctx, end)) {
776 if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) { 767 kfree(id);
777 kfree(id); 768 kfree(*obj);
778 return 0; 769 *obj = NULL;
779 } 770 return 0;
780 len *= sizeof(unsigned long); 771 }
781 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); 772 break;
782 if (*obj == NULL) { 773 case SNMP_OBJECTID:
783 kfree(lp); 774 if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) {
784 kfree(id); 775 kfree(id);
785 if (net_ratelimit()) 776 return 0;
786 pr_notice("OOM in bsalg (%d)\n", __LINE__); 777 }
787 return 0; 778 len *= sizeof(unsigned long);
788 } 779 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
789 memcpy((*obj)->syntax.ul, lp, len); 780 if (*obj == NULL) {
790 kfree(lp); 781 kfree(lp);
791 break; 782 kfree(id);
792 case SNMP_IPADDR: 783 if (net_ratelimit())
793 if (!asn1_octets_decode(ctx, end, &p, &len)) { 784 pr_notice("OOM in bsalg (%d)\n", __LINE__);
794 kfree(id); 785 return 0;
795 return 0; 786 }
796 } 787 memcpy((*obj)->syntax.ul, lp, len);
797 if (len != 4) { 788 kfree(lp);
798 kfree(p); 789 break;
799 kfree(id); 790 case SNMP_IPADDR:
800 return 0; 791 if (!asn1_octets_decode(ctx, end, &p, &len)) {
801 } 792 kfree(id);
802 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); 793 return 0;
803 if (*obj == NULL) { 794 }
804 kfree(p); 795 if (len != 4) {
805 kfree(id);
806 if (net_ratelimit())
807 pr_notice("OOM in bsalg (%d)\n", __LINE__);
808 return 0;
809 }
810 memcpy((*obj)->syntax.uc, p, len);
811 kfree(p); 796 kfree(p);
812 break;
813 case SNMP_COUNTER:
814 case SNMP_GAUGE:
815 case SNMP_TIMETICKS:
816 len = sizeof(unsigned long);
817 if (!asn1_ulong_decode(ctx, end, &ul)) {
818 kfree(id);
819 return 0;
820 }
821 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
822 if (*obj == NULL) {
823 kfree(id);
824 if (net_ratelimit())
825 pr_notice("OOM in bsalg (%d)\n", __LINE__);
826 return 0;
827 }
828 (*obj)->syntax.ul[0] = ul;
829 break;
830 default:
831 kfree(id); 797 kfree(id);
832 return 0; 798 return 0;
799 }
800 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
801 if (*obj == NULL) {
802 kfree(p);
803 kfree(id);
804 if (net_ratelimit())
805 pr_notice("OOM in bsalg (%d)\n", __LINE__);
806 return 0;
807 }
808 memcpy((*obj)->syntax.uc, p, len);
809 kfree(p);
810 break;
811 case SNMP_COUNTER:
812 case SNMP_GAUGE:
813 case SNMP_TIMETICKS:
814 len = sizeof(unsigned long);
815 if (!asn1_ulong_decode(ctx, end, &ul)) {
816 kfree(id);
817 return 0;
818 }
819 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
820 if (*obj == NULL) {
821 kfree(id);
822 if (net_ratelimit())
823 pr_notice("OOM in bsalg (%d)\n", __LINE__);
824 return 0;
825 }
826 (*obj)->syntax.ul[0] = ul;
827 break;
828 default:
829 kfree(id);
830 return 0;
833 } 831 }
834 832
835 (*obj)->syntax_len = len; 833 (*obj)->syntax_len = len;
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 483b76d042d..a6e606e8482 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -88,7 +88,7 @@ nf_nat_fn(unsigned int hooknum,
88 88
89 /* We never see fragments: conntrack defrags on pre-routing 89 /* We never see fragments: conntrack defrags on pre-routing
90 and local-out, and nf_nat_out protects post-routing. */ 90 and local-out, and nf_nat_out protects post-routing. */
91 NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); 91 NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
92 92
93 ct = nf_ct_get(skb, &ctinfo); 93 ct = nf_ct_get(skb, &ctinfo);
94 /* Can't track? It's not due to stress, or conntrack would 94 /* Can't track? It's not due to stress, or conntrack would
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index c9893d43242..08526786dc3 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -825,28 +825,28 @@ static int compat_raw_getsockopt(struct sock *sk, int level, int optname,
825static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) 825static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
826{ 826{
827 switch (cmd) { 827 switch (cmd) {
828 case SIOCOUTQ: { 828 case SIOCOUTQ: {
829 int amount = sk_wmem_alloc_get(sk); 829 int amount = sk_wmem_alloc_get(sk);
830 830
831 return put_user(amount, (int __user *)arg); 831 return put_user(amount, (int __user *)arg);
832 } 832 }
833 case SIOCINQ: { 833 case SIOCINQ: {
834 struct sk_buff *skb; 834 struct sk_buff *skb;
835 int amount = 0; 835 int amount = 0;
836 836
837 spin_lock_bh(&sk->sk_receive_queue.lock); 837 spin_lock_bh(&sk->sk_receive_queue.lock);
838 skb = skb_peek(&sk->sk_receive_queue); 838 skb = skb_peek(&sk->sk_receive_queue);
839 if (skb != NULL) 839 if (skb != NULL)
840 amount = skb->len; 840 amount = skb->len;
841 spin_unlock_bh(&sk->sk_receive_queue.lock); 841 spin_unlock_bh(&sk->sk_receive_queue.lock);
842 return put_user(amount, (int __user *)arg); 842 return put_user(amount, (int __user *)arg);
843 } 843 }
844 844
845 default: 845 default:
846#ifdef CONFIG_IP_MROUTE 846#ifdef CONFIG_IP_MROUTE
847 return ipmr_ioctl(sk, cmd, (void __user *)arg); 847 return ipmr_ioctl(sk, cmd, (void __user *)arg);
848#else 848#else
849 return -ENOIOCTLCMD; 849 return -ENOIOCTLCMD;
850#endif 850#endif
851 } 851 }
852} 852}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index aa13ef10511..33137307d52 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -108,6 +108,7 @@
108#ifdef CONFIG_SYSCTL 108#ifdef CONFIG_SYSCTL
109#include <linux/sysctl.h> 109#include <linux/sysctl.h>
110#endif 110#endif
111#include <net/atmclip.h>
111 112
112#define RT_FL_TOS(oldflp4) \ 113#define RT_FL_TOS(oldflp4) \
113 ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) 114 ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
@@ -184,6 +185,8 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
184 return p; 185 return p;
185} 186}
186 187
188static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr);
189
187static struct dst_ops ipv4_dst_ops = { 190static struct dst_ops ipv4_dst_ops = {
188 .family = AF_INET, 191 .family = AF_INET,
189 .protocol = cpu_to_be16(ETH_P_IP), 192 .protocol = cpu_to_be16(ETH_P_IP),
@@ -198,6 +201,7 @@ static struct dst_ops ipv4_dst_ops = {
198 .link_failure = ipv4_link_failure, 201 .link_failure = ipv4_link_failure,
199 .update_pmtu = ip_rt_update_pmtu, 202 .update_pmtu = ip_rt_update_pmtu,
200 .local_out = __ip_local_out, 203 .local_out = __ip_local_out,
204 .neigh_lookup = ipv4_neigh_lookup,
201}; 205};
202 206
203#define ECN_OR_COST(class) TC_PRIO_##class 207#define ECN_OR_COST(class) TC_PRIO_##class
@@ -411,8 +415,10 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
411 "HHUptod\tSpecDst"); 415 "HHUptod\tSpecDst");
412 else { 416 else {
413 struct rtable *r = v; 417 struct rtable *r = v;
418 struct neighbour *n;
414 int len; 419 int len;
415 420
421 n = dst_get_neighbour(&r->dst);
416 seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" 422 seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
417 "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", 423 "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
418 r->dst.dev ? r->dst.dev->name : "*", 424 r->dst.dev ? r->dst.dev->name : "*",
@@ -425,9 +431,8 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
425 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + 431 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
426 dst_metric(&r->dst, RTAX_RTTVAR)), 432 dst_metric(&r->dst, RTAX_RTTVAR)),
427 r->rt_key_tos, 433 r->rt_key_tos,
428 r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, 434 -1,
429 r->dst.hh ? (r->dst.hh->hh_output == 435 (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0,
430 dev_queue_xmit) : 0,
431 r->rt_spec_dst, &len); 436 r->rt_spec_dst, &len);
432 437
433 seq_printf(seq, "%*s\n", 127 - len, ""); 438 seq_printf(seq, "%*s\n", 127 - len, "");
@@ -1006,6 +1011,37 @@ static int slow_chain_length(const struct rtable *head)
1006 return length >> FRACT_BITS; 1011 return length >> FRACT_BITS;
1007} 1012}
1008 1013
1014static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr)
1015{
1016 struct neigh_table *tbl = &arp_tbl;
1017 static const __be32 inaddr_any = 0;
1018 struct net_device *dev = dst->dev;
1019 const __be32 *pkey = daddr;
1020 struct neighbour *n;
1021
1022#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
1023 if (dev->type == ARPHRD_ATM)
1024 tbl = clip_tbl_hook;
1025#endif
1026 if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
1027 pkey = &inaddr_any;
1028
1029 n = __ipv4_neigh_lookup(tbl, dev, *(__force u32 *)pkey);
1030 if (n)
1031 return n;
1032 return neigh_create(tbl, pkey, dev);
1033}
1034
1035static int rt_bind_neighbour(struct rtable *rt)
1036{
1037 struct neighbour *n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway);
1038 if (IS_ERR(n))
1039 return PTR_ERR(n);
1040 dst_set_neighbour(&rt->dst, n);
1041
1042 return 0;
1043}
1044
1009static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt, 1045static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt,
1010 struct sk_buff *skb, int ifindex) 1046 struct sk_buff *skb, int ifindex)
1011{ 1047{
@@ -1042,7 +1078,7 @@ restart:
1042 1078
1043 rt->dst.flags |= DST_NOCACHE; 1079 rt->dst.flags |= DST_NOCACHE;
1044 if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { 1080 if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) {
1045 int err = arp_bind_neighbour(&rt->dst); 1081 int err = rt_bind_neighbour(rt);
1046 if (err) { 1082 if (err) {
1047 if (net_ratelimit()) 1083 if (net_ratelimit())
1048 printk(KERN_WARNING 1084 printk(KERN_WARNING
@@ -1138,7 +1174,7 @@ restart:
1138 route or unicast forwarding path. 1174 route or unicast forwarding path.
1139 */ 1175 */
1140 if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { 1176 if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) {
1141 int err = arp_bind_neighbour(&rt->dst); 1177 int err = rt_bind_neighbour(rt);
1142 if (err) { 1178 if (err) {
1143 spin_unlock_bh(rt_hash_lock_addr(hash)); 1179 spin_unlock_bh(rt_hash_lock_addr(hash));
1144 1180
@@ -1439,20 +1475,20 @@ static int ip_error(struct sk_buff *skb)
1439 int code; 1475 int code;
1440 1476
1441 switch (rt->dst.error) { 1477 switch (rt->dst.error) {
1442 case EINVAL: 1478 case EINVAL:
1443 default: 1479 default:
1444 goto out; 1480 goto out;
1445 case EHOSTUNREACH: 1481 case EHOSTUNREACH:
1446 code = ICMP_HOST_UNREACH; 1482 code = ICMP_HOST_UNREACH;
1447 break; 1483 break;
1448 case ENETUNREACH: 1484 case ENETUNREACH:
1449 code = ICMP_NET_UNREACH; 1485 code = ICMP_NET_UNREACH;
1450 IP_INC_STATS_BH(dev_net(rt->dst.dev), 1486 IP_INC_STATS_BH(dev_net(rt->dst.dev),
1451 IPSTATS_MIB_INNOROUTES); 1487 IPSTATS_MIB_INNOROUTES);
1452 break; 1488 break;
1453 case EACCES: 1489 case EACCES:
1454 code = ICMP_PKT_FILTERED; 1490 code = ICMP_PKT_FILTERED;
1455 break; 1491 break;
1456 } 1492 }
1457 1493
1458 if (!rt->peer) 1494 if (!rt->peer)
@@ -1592,23 +1628,24 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
1592{ 1628{
1593 struct rtable *rt = (struct rtable *) dst; 1629 struct rtable *rt = (struct rtable *) dst;
1594 __be32 orig_gw = rt->rt_gateway; 1630 __be32 orig_gw = rt->rt_gateway;
1631 struct neighbour *n;
1595 1632
1596 dst_confirm(&rt->dst); 1633 dst_confirm(&rt->dst);
1597 1634
1598 neigh_release(rt->dst.neighbour); 1635 neigh_release(dst_get_neighbour(&rt->dst));
1599 rt->dst.neighbour = NULL; 1636 dst_set_neighbour(&rt->dst, NULL);
1600 1637
1601 rt->rt_gateway = peer->redirect_learned.a4; 1638 rt->rt_gateway = peer->redirect_learned.a4;
1602 if (arp_bind_neighbour(&rt->dst) || 1639 rt_bind_neighbour(rt);
1603 !(rt->dst.neighbour->nud_state & NUD_VALID)) { 1640 n = dst_get_neighbour(&rt->dst);
1604 if (rt->dst.neighbour) 1641 if (!n || !(n->nud_state & NUD_VALID)) {
1605 neigh_event_send(rt->dst.neighbour, NULL); 1642 if (n)
1643 neigh_event_send(n, NULL);
1606 rt->rt_gateway = orig_gw; 1644 rt->rt_gateway = orig_gw;
1607 return -EAGAIN; 1645 return -EAGAIN;
1608 } else { 1646 } else {
1609 rt->rt_flags |= RTCF_REDIRECTED; 1647 rt->rt_flags |= RTCF_REDIRECTED;
1610 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, 1648 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
1611 rt->dst.neighbour);
1612 } 1649 }
1613 return 0; 1650 return 0;
1614} 1651}
@@ -2708,6 +2745,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2708 .default_advmss = ipv4_default_advmss, 2745 .default_advmss = ipv4_default_advmss,
2709 .update_pmtu = ipv4_rt_blackhole_update_pmtu, 2746 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2710 .cow_metrics = ipv4_rt_blackhole_cow_metrics, 2747 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
2748 .neigh_lookup = ipv4_neigh_lookup,
2711}; 2749};
2712 2750
2713struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) 2751struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
@@ -3303,7 +3341,7 @@ int __init ip_rt_init(void)
3303 xfrm_init(); 3341 xfrm_init();
3304 xfrm4_init(ip_rt_max_size); 3342 xfrm4_init(ip_rt_max_size);
3305#endif 3343#endif
3306 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); 3344 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
3307 3345
3308#ifdef CONFIG_SYSCTL 3346#ifdef CONFIG_SYSCTL
3309 register_pernet_subsys(&sysctl_route_ops); 3347 register_pernet_subsys(&sysctl_route_ops);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 26461492a84..92bb9434b33 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -316,6 +316,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
316 ireq->wscale_ok = tcp_opt.wscale_ok; 316 ireq->wscale_ok = tcp_opt.wscale_ok;
317 ireq->tstamp_ok = tcp_opt.saw_tstamp; 317 ireq->tstamp_ok = tcp_opt.saw_tstamp;
318 req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; 318 req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
319 treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
319 320
320 /* We throwed the options of the initial SYN away, so we hope 321 /* We throwed the options of the initial SYN away, so we hope
321 * the ACK carries the same options again (see RFC1122 4.2.3.8) 322 * the ACK carries the same options again (see RFC1122 4.2.3.8)
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 57d0752e239..69fd7201129 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -398,20 +398,6 @@ static struct ctl_table ipv4_table[] = {
398 .proc_handler = proc_dointvec_jiffies, 398 .proc_handler = proc_dointvec_jiffies,
399 }, 399 },
400 { 400 {
401 .procname = "inet_peer_gc_mintime",
402 .data = &inet_peer_gc_mintime,
403 .maxlen = sizeof(int),
404 .mode = 0644,
405 .proc_handler = proc_dointvec_jiffies,
406 },
407 {
408 .procname = "inet_peer_gc_maxtime",
409 .data = &inet_peer_gc_maxtime,
410 .maxlen = sizeof(int),
411 .mode = 0644,
412 .proc_handler = proc_dointvec_jiffies,
413 },
414 {
415 .procname = "tcp_orphan_retries", 401 .procname = "tcp_orphan_retries",
416 .data = &sysctl_tcp_orphan_retries, 402 .data = &sysctl_tcp_orphan_retries,
417 .maxlen = sizeof(int), 403 .maxlen = sizeof(int),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 054a59d21eb..46febcacb72 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3220,7 +3220,7 @@ __setup("thash_entries=", set_thash_entries);
3220void __init tcp_init(void) 3220void __init tcp_init(void)
3221{ 3221{
3222 struct sk_buff *skb = NULL; 3222 struct sk_buff *skb = NULL;
3223 unsigned long nr_pages, limit; 3223 unsigned long limit;
3224 int i, max_share, cnt; 3224 int i, max_share, cnt;
3225 unsigned long jiffy = jiffies; 3225 unsigned long jiffy = jiffies;
3226 3226
@@ -3277,13 +3277,7 @@ void __init tcp_init(void)
3277 sysctl_tcp_max_orphans = cnt / 2; 3277 sysctl_tcp_max_orphans = cnt / 2;
3278 sysctl_max_syn_backlog = max(128, cnt / 256); 3278 sysctl_max_syn_backlog = max(128, cnt / 256);
3279 3279
3280 /* Set the pressure threshold to be a fraction of global memory that 3280 limit = nr_free_buffer_pages() / 8;
3281 * is up to 1/2 at 256 MB, decreasing toward zero with the amount of
3282 * memory, with a floor of 128 pages.
3283 */
3284 nr_pages = totalram_pages - totalhigh_pages;
3285 limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
3286 limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
3287 limit = max(limit, 128UL); 3281 limit = max(limit, 128UL);
3288 sysctl_tcp_mem[0] = limit / 4 * 3; 3282 sysctl_tcp_mem[0] = limit / 4 * 3;
3289 sysctl_tcp_mem[1] = limit; 3283 sysctl_tcp_mem[1] = limit;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bef9f04c22b..ea0d2183df4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -880,6 +880,11 @@ static void tcp_init_metrics(struct sock *sk)
880 tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); 880 tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH);
881 if (tp->snd_ssthresh > tp->snd_cwnd_clamp) 881 if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
882 tp->snd_ssthresh = tp->snd_cwnd_clamp; 882 tp->snd_ssthresh = tp->snd_cwnd_clamp;
883 } else {
884 /* ssthresh may have been reduced unnecessarily during.
885 * 3WHS. Restore it back to its initial default.
886 */
887 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
883 } 888 }
884 if (dst_metric(dst, RTAX_REORDERING) && 889 if (dst_metric(dst, RTAX_REORDERING) &&
885 tp->reordering != dst_metric(dst, RTAX_REORDERING)) { 890 tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
@@ -887,10 +892,7 @@ static void tcp_init_metrics(struct sock *sk)
887 tp->reordering = dst_metric(dst, RTAX_REORDERING); 892 tp->reordering = dst_metric(dst, RTAX_REORDERING);
888 } 893 }
889 894
890 if (dst_metric(dst, RTAX_RTT) == 0) 895 if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0)
891 goto reset;
892
893 if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
894 goto reset; 896 goto reset;
895 897
896 /* Initial rtt is determined from SYN,SYN-ACK. 898 /* Initial rtt is determined from SYN,SYN-ACK.
@@ -916,19 +918,26 @@ static void tcp_init_metrics(struct sock *sk)
916 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); 918 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
917 } 919 }
918 tcp_set_rto(sk); 920 tcp_set_rto(sk);
919 if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) {
920reset: 921reset:
921 /* Play conservative. If timestamps are not 922 if (tp->srtt == 0) {
922 * supported, TCP will fail to recalculate correct 923 /* RFC2988bis: We've failed to get a valid RTT sample from
923 * rtt, if initial rto is too small. FORGET ALL AND RESET! 924 * 3WHS. This is most likely due to retransmission,
925 * including spurious one. Reset the RTO back to 3secs
926 * from the more aggressive 1sec to avoid more spurious
927 * retransmission.
924 */ 928 */
925 if (!tp->rx_opt.saw_tstamp && tp->srtt) { 929 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK;
926 tp->srtt = 0; 930 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
927 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
928 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
929 }
930 } 931 }
931 tp->snd_cwnd = tcp_init_cwnd(tp, dst); 932 /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
933 * retransmitted. In light of RFC2988bis' more aggressive 1sec
934 * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
935 * retransmission has occurred.
936 */
937 if (tp->total_retrans > 1)
938 tp->snd_cwnd = 1;
939 else
940 tp->snd_cwnd = tcp_init_cwnd(tp, dst);
932 tp->snd_cwnd_stamp = tcp_time_stamp; 941 tp->snd_cwnd_stamp = tcp_time_stamp;
933} 942}
934 943
@@ -3112,12 +3121,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
3112 tcp_xmit_retransmit_queue(sk); 3121 tcp_xmit_retransmit_queue(sk);
3113} 3122}
3114 3123
3115static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) 3124void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
3116{ 3125{
3117 tcp_rtt_estimator(sk, seq_rtt); 3126 tcp_rtt_estimator(sk, seq_rtt);
3118 tcp_set_rto(sk); 3127 tcp_set_rto(sk);
3119 inet_csk(sk)->icsk_backoff = 0; 3128 inet_csk(sk)->icsk_backoff = 0;
3120} 3129}
3130EXPORT_SYMBOL(tcp_valid_rtt_meas);
3121 3131
3122/* Read draft-ietf-tcplw-high-performance before mucking 3132/* Read draft-ietf-tcplw-high-performance before mucking
3123 * with this code. (Supersedes RFC1323) 3133 * with this code. (Supersedes RFC1323)
@@ -5806,12 +5816,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5806 tp->rx_opt.snd_wscale; 5816 tp->rx_opt.snd_wscale;
5807 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); 5817 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5808 5818
5809 /* tcp_ack considers this ACK as duplicate
5810 * and does not calculate rtt.
5811 * Force it here.
5812 */
5813 tcp_ack_update_rtt(sk, 0, 0);
5814
5815 if (tp->rx_opt.tstamp_ok) 5819 if (tp->rx_opt.tstamp_ok)
5816 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; 5820 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5817 5821
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 708dc203b03..955b8e65b69 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -429,8 +429,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
429 break; 429 break;
430 430
431 icsk->icsk_backoff--; 431 icsk->icsk_backoff--;
432 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << 432 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
433 icsk->icsk_backoff; 433 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
434 tcp_bound_rto(sk); 434 tcp_bound_rto(sk);
435 435
436 skb = tcp_write_queue_head(sk); 436 skb = tcp_write_queue_head(sk);
@@ -1384,6 +1384,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1384 isn = tcp_v4_init_sequence(skb); 1384 isn = tcp_v4_init_sequence(skb);
1385 } 1385 }
1386 tcp_rsk(req)->snt_isn = isn; 1386 tcp_rsk(req)->snt_isn = isn;
1387 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1387 1388
1388 if (tcp_v4_send_synack(sk, dst, req, 1389 if (tcp_v4_send_synack(sk, dst, req,
1389 (struct request_values *)&tmp_ext) || 1390 (struct request_values *)&tmp_ext) ||
@@ -1458,6 +1459,10 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1458 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; 1459 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1459 1460
1460 tcp_initialize_rcv_mss(newsk); 1461 tcp_initialize_rcv_mss(newsk);
1462 if (tcp_rsk(req)->snt_synack)
1463 tcp_valid_rtt_meas(newsk,
1464 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1465 newtp->total_retrans = req->retrans;
1461 1466
1462#ifdef CONFIG_TCP_MD5SIG 1467#ifdef CONFIG_TCP_MD5SIG
1463 /* Copy over the MD5 key from the original socket */ 1468 /* Copy over the MD5 key from the original socket */
@@ -1855,7 +1860,7 @@ static int tcp_v4_init_sock(struct sock *sk)
1855 * algorithms that we must have the following bandaid to talk 1860 * algorithms that we must have the following bandaid to talk
1856 * efficiently to them. -DaveM 1861 * efficiently to them. -DaveM
1857 */ 1862 */
1858 tp->snd_cwnd = 2; 1863 tp->snd_cwnd = TCP_INIT_CWND;
1859 1864
1860 /* See draft-stevens-tcpca-spec-01 for discussion of the 1865 /* See draft-stevens-tcpca-spec-01 for discussion of the
1861 * initialization of these values. 1866 * initialization of these values.
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 80b1f80759a..d2fe4e06b47 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -486,7 +486,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
486 * algorithms that we must have the following bandaid to talk 486 * algorithms that we must have the following bandaid to talk
487 * efficiently to them. -DaveM 487 * efficiently to them. -DaveM
488 */ 488 */
489 newtp->snd_cwnd = 2; 489 newtp->snd_cwnd = TCP_INIT_CWND;
490 newtp->snd_cwnd_cnt = 0; 490 newtp->snd_cwnd_cnt = 0;
491 newtp->bytes_acked = 0; 491 newtp->bytes_acked = 0;
492 492
@@ -720,6 +720,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
720 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); 720 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
721 return NULL; 721 return NULL;
722 } 722 }
723 if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
724 tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
725 else if (req->retrans) /* don't take RTT sample if retrans && ~TS */
726 tcp_rsk(req)->snt_synack = 0;
723 727
724 /* OK, ACK is valid, create big socket and 728 /* OK, ACK is valid, create big socket and
725 * feed this segment to it. It will repeat all 729 * feed this segment to it. It will repeat all
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index abca870d8ff..1b5a19340a9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -105,6 +105,7 @@
105#include <net/route.h> 105#include <net/route.h>
106#include <net/checksum.h> 106#include <net/checksum.h>
107#include <net/xfrm.h> 107#include <net/xfrm.h>
108#include <trace/events/udp.h>
108#include "udp_impl.h" 109#include "udp_impl.h"
109 110
110struct udp_table udp_table __read_mostly; 111struct udp_table udp_table __read_mostly;
@@ -1249,6 +1250,9 @@ csum_copy_err:
1249 1250
1250 if (noblock) 1251 if (noblock)
1251 return -EAGAIN; 1252 return -EAGAIN;
1253
1254 /* starting over for a new packet */
1255 msg->msg_flags &= ~MSG_TRUNC;
1252 goto try_again; 1256 goto try_again;
1253} 1257}
1254 1258
@@ -1363,6 +1367,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1363 is_udplite); 1367 is_udplite);
1364 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 1368 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1365 kfree_skb(skb); 1369 kfree_skb(skb);
1370 trace_udp_fail_queue_rcv_skb(rc, sk);
1366 return -1; 1371 return -1;
1367 } 1372 }
1368 1373
@@ -2206,16 +2211,10 @@ void __init udp_table_init(struct udp_table *table, const char *name)
2206 2211
2207void __init udp_init(void) 2212void __init udp_init(void)
2208{ 2213{
2209 unsigned long nr_pages, limit; 2214 unsigned long limit;
2210 2215
2211 udp_table_init(&udp_table, "UDP"); 2216 udp_table_init(&udp_table, "UDP");
2212 /* Set the pressure threshold up by the same strategy of TCP. It is a 2217 limit = nr_free_buffer_pages() / 8;
2213 * fraction of global memory that is up to 1/2 at 256 MB, decreasing
2214 * toward zero with the amount of memory, with a floor of 128 pages.
2215 */
2216 nr_pages = totalram_pages - totalhigh_pages;
2217 limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
2218 limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
2219 limit = max(limit, 128UL); 2218 limit = max(limit, 128UL);
2220 sysctl_udp_mem[0] = limit / 4 * 3; 2219 sysctl_udp_mem[0] = limit / 4 * 3;
2221 sysctl_udp_mem[1] = limit; 2220 sysctl_udp_mem[1] = limit;
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 2d51840e53a..327a617d594 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -32,7 +32,12 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
32 dst = skb_dst(skb); 32 dst = skb_dst(skb);
33 mtu = dst_mtu(dst); 33 mtu = dst_mtu(dst);
34 if (skb->len > mtu) { 34 if (skb->len > mtu) {
35 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 35 if (skb->sk)
36 ip_local_error(skb->sk, EMSGSIZE, ip_hdr(skb)->daddr,
37 inet_sk(skb->sk)->inet_dport, mtu);
38 else
39 icmp_send(skb, ICMP_DEST_UNREACH,
40 ICMP_FRAG_NEEDED, htonl(mtu));
36 ret = -EMSGSIZE; 41 ret = -EMSGSIZE;
37 } 42 }
38out: 43out:
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 981e43eaf70..fc5368ad2b0 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -117,7 +117,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
117 memset(fl4, 0, sizeof(struct flowi4)); 117 memset(fl4, 0, sizeof(struct flowi4));
118 fl4->flowi4_mark = skb->mark; 118 fl4->flowi4_mark = skb->mark;
119 119
120 if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { 120 if (!ip_is_fragment(iph)) {
121 switch (iph->protocol) { 121 switch (iph->protocol) {
122 case IPPROTO_UDP: 122 case IPPROTO_UDP:
123 case IPPROTO_UDPLITE: 123 case IPPROTO_UDPLITE: