aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c77
-rw-r--r--net/ipv4/ah4.c8
-rw-r--r--net/ipv4/arp.c41
-rw-r--r--net/ipv4/cipso_ipv4.c2
-rw-r--r--net/ipv4/devinet.c35
-rw-r--r--net/ipv4/fib_frontend.c6
-rw-r--r--net/ipv4/fib_semantics.c10
-rw-r--r--net/ipv4/fib_trie.c12
-rw-r--r--net/ipv4/gre.c22
-rw-r--r--net/ipv4/icmp.c14
-rw-r--r--net/ipv4/igmp.c6
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/inet_hashtables.c1
-rw-r--r--net/ipv4/inet_lro.c74
-rw-r--r--net/ipv4/inetpeer.c294
-rw-r--r--net/ipv4/ip_fragment.c5
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ip_input.c4
-rw-r--r--net/ipv4/ip_options.c1
-rw-r--r--net/ipv4/ip_output.c23
-rw-r--r--net/ipv4/ip_sockglue.c9
-rw-r--r--net/ipv4/ipconfig.c79
-rw-r--r--net/ipv4/ipip.c7
-rw-r--r--net/ipv4/ipmr.c5
-rw-r--r--net/ipv4/netfilter.c18
-rw-r--r--net/ipv4/netfilter/Kconfig12
-rw-r--r--net/ipv4/netfilter/ip_queue.c12
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c26
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c8
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_common.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c210
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c2
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/raw.c41
-rw-r--r--net/ipv4/route.c347
-rw-r--r--net/ipv4/syncookies.c3
-rw-r--r--net/ipv4/sysctl_net_ipv4.c14
-rw-r--r--net/ipv4/sysfs_net_ipv4.c88
-rw-r--r--net/ipv4/tcp.c121
-rw-r--r--net/ipv4/tcp_input.c52
-rw-r--r--net/ipv4/tcp_ipv4.c74
-rw-r--r--net/ipv4/tcp_minisocks.c7
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv4/xfrm4_policy.c16
47 files changed, 1037 insertions, 767 deletions
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index f2dc69cffb5..681084d76a9 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -14,6 +14,7 @@ obj-y := route.o inetpeer.o protocol.o \
14 inet_fragment.o ping.o 14 inet_fragment.o ping.o
15 15
16obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o 16obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
17obj-$(CONFIG_SYSFS) += sysfs_net_ipv4.o
17obj-$(CONFIG_PROC_FS) += proc.o 18obj-$(CONFIG_PROC_FS) += proc.o
18obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o 19obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
19obj-$(CONFIG_IP_MROUTE) += ipmr.o 20obj-$(CONFIG_IP_MROUTE) += ipmr.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ef1528af7ab..bf488051a8d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -118,6 +118,19 @@
118#include <linux/mroute.h> 118#include <linux/mroute.h>
119#endif 119#endif
120 120
121#ifdef CONFIG_ANDROID_PARANOID_NETWORK
122#include <linux/android_aid.h>
123
124static inline int current_has_network(void)
125{
126 return in_egroup_p(AID_INET) || capable(CAP_NET_RAW);
127}
128#else
129static inline int current_has_network(void)
130{
131 return 1;
132}
133#endif
121 134
122/* The inetsw table contains everything that inet_create needs to 135/* The inetsw table contains everything that inet_create needs to
123 * build a new socket. 136 * build a new socket.
@@ -258,6 +271,7 @@ static inline int inet_netns_ok(struct net *net, int protocol)
258 return ipprot->netns_ok; 271 return ipprot->netns_ok;
259} 272}
260 273
274
261/* 275/*
262 * Create an inet socket. 276 * Create an inet socket.
263 */ 277 */
@@ -274,6 +288,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
274 int try_loading_module = 0; 288 int try_loading_module = 0;
275 int err; 289 int err;
276 290
291 if (!current_has_network())
292 return -EACCES;
293
277 if (unlikely(!inet_ehash_secret)) 294 if (unlikely(!inet_ehash_secret))
278 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) 295 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
279 build_ehash_secret(); 296 build_ehash_secret();
@@ -466,8 +483,13 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
466 goto out; 483 goto out;
467 484
468 if (addr->sin_family != AF_INET) { 485 if (addr->sin_family != AF_INET) {
486 /* Compatibility games : accept AF_UNSPEC (mapped to AF_INET)
487 * only if s_addr is INADDR_ANY.
488 */
469 err = -EAFNOSUPPORT; 489 err = -EAFNOSUPPORT;
470 goto out; 490 if (addr->sin_family != AF_UNSPEC ||
491 addr->sin_addr.s_addr != htonl(INADDR_ANY))
492 goto out;
471 } 493 }
472 494
473 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); 495 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
@@ -874,6 +896,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
874 case SIOCSIFPFLAGS: 896 case SIOCSIFPFLAGS:
875 case SIOCGIFPFLAGS: 897 case SIOCGIFPFLAGS:
876 case SIOCSIFFLAGS: 898 case SIOCSIFFLAGS:
899 case SIOCKILLADDR:
877 err = devinet_ioctl(net, cmd, (void __user *)arg); 900 err = devinet_ioctl(net, cmd, (void __user *)arg);
878 break; 901 break;
879 default: 902 default:
@@ -1440,11 +1463,11 @@ EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
1440unsigned long snmp_fold_field(void __percpu *mib[], int offt) 1463unsigned long snmp_fold_field(void __percpu *mib[], int offt)
1441{ 1464{
1442 unsigned long res = 0; 1465 unsigned long res = 0;
1443 int i; 1466 int i, j;
1444 1467
1445 for_each_possible_cpu(i) { 1468 for_each_possible_cpu(i) {
1446 res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt); 1469 for (j = 0; j < SNMP_ARRAY_SZ; j++)
1447 res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt); 1470 res += *(((unsigned long *) per_cpu_ptr(mib[j], i)) + offt);
1448 } 1471 }
1449 return res; 1472 return res;
1450} 1473}
@@ -1458,28 +1481,19 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
1458 int cpu; 1481 int cpu;
1459 1482
1460 for_each_possible_cpu(cpu) { 1483 for_each_possible_cpu(cpu) {
1461 void *bhptr, *userptr; 1484 void *bhptr;
1462 struct u64_stats_sync *syncp; 1485 struct u64_stats_sync *syncp;
1463 u64 v_bh, v_user; 1486 u64 v;
1464 unsigned int start; 1487 unsigned int start;
1465 1488
1466 /* first mib used by softirq context, we must use _bh() accessors */ 1489 bhptr = per_cpu_ptr(mib[0], cpu);
1467 bhptr = per_cpu_ptr(SNMP_STAT_BHPTR(mib), cpu);
1468 syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); 1490 syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
1469 do { 1491 do {
1470 start = u64_stats_fetch_begin_bh(syncp); 1492 start = u64_stats_fetch_begin_bh(syncp);
1471 v_bh = *(((u64 *) bhptr) + offt); 1493 v = *(((u64 *) bhptr) + offt);
1472 } while (u64_stats_fetch_retry_bh(syncp, start)); 1494 } while (u64_stats_fetch_retry_bh(syncp, start));
1473 1495
1474 /* second mib used in USER context */ 1496 res += v;
1475 userptr = per_cpu_ptr(SNMP_STAT_USRPTR(mib), cpu);
1476 syncp = (struct u64_stats_sync *)(userptr + syncp_offset);
1477 do {
1478 start = u64_stats_fetch_begin(syncp);
1479 v_user = *(((u64 *) userptr) + offt);
1480 } while (u64_stats_fetch_retry(syncp, start));
1481
1482 res += v_bh + v_user;
1483 } 1497 }
1484 return res; 1498 return res;
1485} 1499}
@@ -1491,25 +1505,28 @@ int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
1491 BUG_ON(ptr == NULL); 1505 BUG_ON(ptr == NULL);
1492 ptr[0] = __alloc_percpu(mibsize, align); 1506 ptr[0] = __alloc_percpu(mibsize, align);
1493 if (!ptr[0]) 1507 if (!ptr[0])
1494 goto err0; 1508 return -ENOMEM;
1509#if SNMP_ARRAY_SZ == 2
1495 ptr[1] = __alloc_percpu(mibsize, align); 1510 ptr[1] = __alloc_percpu(mibsize, align);
1496 if (!ptr[1]) 1511 if (!ptr[1]) {
1497 goto err1; 1512 free_percpu(ptr[0]);
1513 ptr[0] = NULL;
1514 return -ENOMEM;
1515 }
1516#endif
1498 return 0; 1517 return 0;
1499err1:
1500 free_percpu(ptr[0]);
1501 ptr[0] = NULL;
1502err0:
1503 return -ENOMEM;
1504} 1518}
1505EXPORT_SYMBOL_GPL(snmp_mib_init); 1519EXPORT_SYMBOL_GPL(snmp_mib_init);
1506 1520
1507void snmp_mib_free(void __percpu *ptr[2]) 1521void snmp_mib_free(void __percpu *ptr[SNMP_ARRAY_SZ])
1508{ 1522{
1523 int i;
1524
1509 BUG_ON(ptr == NULL); 1525 BUG_ON(ptr == NULL);
1510 free_percpu(ptr[0]); 1526 for (i = 0; i < SNMP_ARRAY_SZ; i++) {
1511 free_percpu(ptr[1]); 1527 free_percpu(ptr[i]);
1512 ptr[0] = ptr[1] = NULL; 1528 ptr[i] = NULL;
1529 }
1513} 1530}
1514EXPORT_SYMBOL_GPL(snmp_mib_free); 1531EXPORT_SYMBOL_GPL(snmp_mib_free);
1515 1532
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index c1f4154552f..36d14406261 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -136,8 +136,6 @@ static void ah_output_done(struct crypto_async_request *base, int err)
136 memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); 136 memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr));
137 } 137 }
138 138
139 err = ah->nexthdr;
140
141 kfree(AH_SKB_CB(skb)->tmp); 139 kfree(AH_SKB_CB(skb)->tmp);
142 xfrm_output_resume(skb, err); 140 xfrm_output_resume(skb, err);
143} 141}
@@ -264,12 +262,12 @@ static void ah_input_done(struct crypto_async_request *base, int err)
264 if (err) 262 if (err)
265 goto out; 263 goto out;
266 264
265 err = ah->nexthdr;
266
267 skb->network_header += ah_hlen; 267 skb->network_header += ah_hlen;
268 memcpy(skb_network_header(skb), work_iph, ihl); 268 memcpy(skb_network_header(skb), work_iph, ihl);
269 __skb_pull(skb, ah_hlen + ihl); 269 __skb_pull(skb, ah_hlen + ihl);
270 skb_set_transport_header(skb, -ihl); 270 skb_set_transport_header(skb, -ihl);
271
272 err = ah->nexthdr;
273out: 271out:
274 kfree(AH_SKB_CB(skb)->tmp); 272 kfree(AH_SKB_CB(skb)->tmp);
275 xfrm_input_resume(skb, err); 273 xfrm_input_resume(skb, err);
@@ -371,8 +369,6 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
371 if (err == -EINPROGRESS) 369 if (err == -EINPROGRESS)
372 goto out; 370 goto out;
373 371
374 if (err == -EBUSY)
375 err = NET_XMIT_DROP;
376 goto out_free; 372 goto out_free;
377 } 373 }
378 374
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 1b74d3b6437..96a164aa136 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -97,7 +97,6 @@
97#include <linux/init.h> 97#include <linux/init.h>
98#include <linux/net.h> 98#include <linux/net.h>
99#include <linux/rcupdate.h> 99#include <linux/rcupdate.h>
100#include <linux/jhash.h>
101#include <linux/slab.h> 100#include <linux/slab.h>
102#ifdef CONFIG_SYSCTL 101#ifdef CONFIG_SYSCTL
103#include <linux/sysctl.h> 102#include <linux/sysctl.h>
@@ -139,8 +138,6 @@ static const struct neigh_ops arp_generic_ops = {
139 .error_report = arp_error_report, 138 .error_report = arp_error_report,
140 .output = neigh_resolve_output, 139 .output = neigh_resolve_output,
141 .connected_output = neigh_connected_output, 140 .connected_output = neigh_connected_output,
142 .hh_output = dev_queue_xmit,
143 .queue_xmit = dev_queue_xmit,
144}; 141};
145 142
146static const struct neigh_ops arp_hh_ops = { 143static const struct neigh_ops arp_hh_ops = {
@@ -149,16 +146,12 @@ static const struct neigh_ops arp_hh_ops = {
149 .error_report = arp_error_report, 146 .error_report = arp_error_report,
150 .output = neigh_resolve_output, 147 .output = neigh_resolve_output,
151 .connected_output = neigh_resolve_output, 148 .connected_output = neigh_resolve_output,
152 .hh_output = dev_queue_xmit,
153 .queue_xmit = dev_queue_xmit,
154}; 149};
155 150
156static const struct neigh_ops arp_direct_ops = { 151static const struct neigh_ops arp_direct_ops = {
157 .family = AF_INET, 152 .family = AF_INET,
158 .output = dev_queue_xmit, 153 .output = neigh_direct_output,
159 .connected_output = dev_queue_xmit, 154 .connected_output = neigh_direct_output,
160 .hh_output = dev_queue_xmit,
161 .queue_xmit = dev_queue_xmit,
162}; 155};
163 156
164static const struct neigh_ops arp_broken_ops = { 157static const struct neigh_ops arp_broken_ops = {
@@ -167,8 +160,6 @@ static const struct neigh_ops arp_broken_ops = {
167 .error_report = arp_error_report, 160 .error_report = arp_error_report,
168 .output = neigh_compat_output, 161 .output = neigh_compat_output,
169 .connected_output = neigh_compat_output, 162 .connected_output = neigh_compat_output,
170 .hh_output = dev_queue_xmit,
171 .queue_xmit = dev_queue_xmit,
172}; 163};
173 164
174struct neigh_table arp_tbl = { 165struct neigh_table arp_tbl = {
@@ -232,7 +223,7 @@ static u32 arp_hash(const void *pkey,
232 const struct net_device *dev, 223 const struct net_device *dev,
233 __u32 hash_rnd) 224 __u32 hash_rnd)
234{ 225{
235 return jhash_2words(*(u32 *)pkey, dev->ifindex, hash_rnd); 226 return arp_hashfn(*(u32 *)pkey, dev, hash_rnd);
236} 227}
237 228
238static int arp_constructor(struct neighbour *neigh) 229static int arp_constructor(struct neighbour *neigh)
@@ -259,7 +250,7 @@ static int arp_constructor(struct neighbour *neigh)
259 if (!dev->header_ops) { 250 if (!dev->header_ops) {
260 neigh->nud_state = NUD_NOARP; 251 neigh->nud_state = NUD_NOARP;
261 neigh->ops = &arp_direct_ops; 252 neigh->ops = &arp_direct_ops;
262 neigh->output = neigh->ops->queue_xmit; 253 neigh->output = neigh_direct_output;
263 } else { 254 } else {
264 /* Good devices (checked by reading texts, but only Ethernet is 255 /* Good devices (checked by reading texts, but only Ethernet is
265 tested) 256 tested)
@@ -518,30 +509,6 @@ EXPORT_SYMBOL(arp_find);
518 509
519/* END OF OBSOLETE FUNCTIONS */ 510/* END OF OBSOLETE FUNCTIONS */
520 511
521int arp_bind_neighbour(struct dst_entry *dst)
522{
523 struct net_device *dev = dst->dev;
524 struct neighbour *n = dst->neighbour;
525
526 if (dev == NULL)
527 return -EINVAL;
528 if (n == NULL) {
529 __be32 nexthop = ((struct rtable *)dst)->rt_gateway;
530 if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
531 nexthop = 0;
532 n = __neigh_lookup_errno(
533#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
534 dev->type == ARPHRD_ATM ?
535 clip_tbl_hook :
536#endif
537 &arp_tbl, &nexthop, dev);
538 if (IS_ERR(n))
539 return PTR_ERR(n);
540 dst->neighbour = n;
541 }
542 return 0;
543}
544
545/* 512/*
546 * Check if we can use proxy ARP for this path 513 * Check if we can use proxy ARP for this path
547 */ 514 */
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2b3c23c287c..2c2a98e402e 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -50,7 +50,7 @@
50#include <net/tcp.h> 50#include <net/tcp.h>
51#include <net/netlabel.h> 51#include <net/netlabel.h>
52#include <net/cipso_ipv4.h> 52#include <net/cipso_ipv4.h>
53#include <asm/atomic.h> 53#include <linux/atomic.h>
54#include <asm/bug.h> 54#include <asm/bug.h>
55#include <asm/unaligned.h> 55#include <asm/unaligned.h>
56 56
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 0d4a184af16..76db59202f1 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -59,6 +59,7 @@
59 59
60#include <net/arp.h> 60#include <net/arp.h>
61#include <net/ip.h> 61#include <net/ip.h>
62#include <net/tcp.h>
62#include <net/route.h> 63#include <net/route.h>
63#include <net/ip_fib.h> 64#include <net/ip_fib.h>
64#include <net/rtnetlink.h> 65#include <net/rtnetlink.h>
@@ -735,6 +736,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
735 case SIOCSIFBRDADDR: /* Set the broadcast address */ 736 case SIOCSIFBRDADDR: /* Set the broadcast address */
736 case SIOCSIFDSTADDR: /* Set the destination address */ 737 case SIOCSIFDSTADDR: /* Set the destination address */
737 case SIOCSIFNETMASK: /* Set the netmask for the interface */ 738 case SIOCSIFNETMASK: /* Set the netmask for the interface */
739 case SIOCKILLADDR: /* Nuke all sockets on this address */
738 ret = -EACCES; 740 ret = -EACCES;
739 if (!capable(CAP_NET_ADMIN)) 741 if (!capable(CAP_NET_ADMIN))
740 goto out; 742 goto out;
@@ -786,7 +788,8 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
786 } 788 }
787 789
788 ret = -EADDRNOTAVAIL; 790 ret = -EADDRNOTAVAIL;
789 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) 791 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS
792 && cmd != SIOCKILLADDR)
790 goto done; 793 goto done;
791 794
792 switch (cmd) { 795 switch (cmd) {
@@ -912,6 +915,9 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
912 inet_insert_ifa(ifa); 915 inet_insert_ifa(ifa);
913 } 916 }
914 break; 917 break;
918 case SIOCKILLADDR: /* Nuke all connections on this address */
919 ret = tcp_nuke_addr(net, (struct sockaddr *) sin);
920 break;
915 } 921 }
916done: 922done:
917 rtnl_unlock(); 923 rtnl_unlock();
@@ -1134,15 +1140,15 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev,
1134 struct in_device *in_dev) 1140 struct in_device *in_dev)
1135 1141
1136{ 1142{
1137 struct in_ifaddr *ifa = in_dev->ifa_list; 1143 struct in_ifaddr *ifa;
1138
1139 if (!ifa)
1140 return;
1141 1144
1142 arp_send(ARPOP_REQUEST, ETH_P_ARP, 1145 for (ifa = in_dev->ifa_list; ifa;
1143 ifa->ifa_local, dev, 1146 ifa = ifa->ifa_next) {
1144 ifa->ifa_local, NULL, 1147 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1145 dev->dev_addr, NULL); 1148 ifa->ifa_local, dev,
1149 ifa->ifa_local, NULL,
1150 dev->dev_addr, NULL);
1151 }
1146} 1152}
1147 1153
1148/* Called only under RTNL semaphore */ 1154/* Called only under RTNL semaphore */
@@ -1490,7 +1496,9 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
1490 void __user *buffer, 1496 void __user *buffer,
1491 size_t *lenp, loff_t *ppos) 1497 size_t *lenp, loff_t *ppos)
1492{ 1498{
1499 int old_value = *(int *)ctl->data;
1493 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1500 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1501 int new_value = *(int *)ctl->data;
1494 1502
1495 if (write) { 1503 if (write) {
1496 struct ipv4_devconf *cnf = ctl->extra1; 1504 struct ipv4_devconf *cnf = ctl->extra1;
@@ -1501,6 +1509,9 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
1501 1509
1502 if (cnf == net->ipv4.devconf_dflt) 1510 if (cnf == net->ipv4.devconf_dflt)
1503 devinet_copy_dflt_conf(net, i); 1511 devinet_copy_dflt_conf(net, i);
1512 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1)
1513 if ((new_value == 0) && (old_value != 0))
1514 rt_cache_flush(net, 0);
1504 } 1515 }
1505 1516
1506 return ret; 1517 return ret;
@@ -1833,8 +1844,8 @@ void __init devinet_init(void)
1833 1844
1834 rtnl_af_register(&inet_af_ops); 1845 rtnl_af_register(&inet_af_ops);
1835 1846
1836 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); 1847 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1837 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); 1848 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1838 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); 1849 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1839} 1850}
1840 1851
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 22524716fe7..92fc5f69f5d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1124,9 +1124,9 @@ static struct pernet_operations fib_net_ops = {
1124 1124
1125void __init ip_fib_init(void) 1125void __init ip_fib_init(void)
1126{ 1126{
1127 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); 1127 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
1128 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); 1128 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
1129 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); 1129 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
1130 1130
1131 register_pernet_subsys(&fib_net_ops); 1131 register_pernet_subsys(&fib_net_ops);
1132 register_netdevice_notifier(&fib_netdev_notifier); 1132 register_netdevice_notifier(&fib_netdev_notifier);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 33e2c35b74b..80106d89d54 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -142,6 +142,14 @@ const struct fib_prop fib_props[RTN_MAX + 1] = {
142}; 142};
143 143
144/* Release a nexthop info record */ 144/* Release a nexthop info record */
145static void free_fib_info_rcu(struct rcu_head *head)
146{
147 struct fib_info *fi = container_of(head, struct fib_info, rcu);
148
149 if (fi->fib_metrics != (u32 *) dst_default_metrics)
150 kfree(fi->fib_metrics);
151 kfree(fi);
152}
145 153
146void free_fib_info(struct fib_info *fi) 154void free_fib_info(struct fib_info *fi)
147{ 155{
@@ -156,7 +164,7 @@ void free_fib_info(struct fib_info *fi)
156 } endfor_nexthops(fi); 164 } endfor_nexthops(fi);
157 fib_info_cnt--; 165 fib_info_cnt--;
158 release_net(fi->fib_net); 166 release_net(fi->fib_net);
159 kfree_rcu(fi, rcu); 167 call_rcu(&fi->rcu, free_fib_info_rcu);
160} 168}
161 169
162void fib_release_info(struct fib_info *fi) 170void fib_release_info(struct fib_info *fi)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 58c25ea5a5c..de9e2978476 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -110,9 +110,10 @@ struct leaf {
110 110
111struct leaf_info { 111struct leaf_info {
112 struct hlist_node hlist; 112 struct hlist_node hlist;
113 struct rcu_head rcu;
114 int plen; 113 int plen;
114 u32 mask_plen; /* ntohl(inet_make_mask(plen)) */
115 struct list_head falh; 115 struct list_head falh;
116 struct rcu_head rcu;
116}; 117};
117 118
118struct tnode { 119struct tnode {
@@ -451,6 +452,7 @@ static struct leaf_info *leaf_info_new(int plen)
451 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); 452 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
452 if (li) { 453 if (li) {
453 li->plen = plen; 454 li->plen = plen;
455 li->mask_plen = ntohl(inet_make_mask(plen));
454 INIT_LIST_HEAD(&li->falh); 456 INIT_LIST_HEAD(&li->falh);
455 } 457 }
456 return li; 458 return li;
@@ -1359,10 +1361,8 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
1359 1361
1360 hlist_for_each_entry_rcu(li, node, hhead, hlist) { 1362 hlist_for_each_entry_rcu(li, node, hhead, hlist) {
1361 struct fib_alias *fa; 1363 struct fib_alias *fa;
1362 int plen = li->plen;
1363 __be32 mask = inet_make_mask(plen);
1364 1364
1365 if (l->key != (key & ntohl(mask))) 1365 if (l->key != (key & li->mask_plen))
1366 continue; 1366 continue;
1367 1367
1368 list_for_each_entry_rcu(fa, &li->falh, fa_list) { 1368 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
@@ -1394,7 +1394,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
1394#ifdef CONFIG_IP_FIB_TRIE_STATS 1394#ifdef CONFIG_IP_FIB_TRIE_STATS
1395 t->stats.semantic_match_passed++; 1395 t->stats.semantic_match_passed++;
1396#endif 1396#endif
1397 res->prefixlen = plen; 1397 res->prefixlen = li->plen;
1398 res->nh_sel = nhsel; 1398 res->nh_sel = nhsel;
1399 res->type = fa->fa_type; 1399 res->type = fa->fa_type;
1400 res->scope = fa->fa_info->fib_scope; 1400 res->scope = fa->fa_info->fib_scope;
@@ -1402,7 +1402,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
1402 res->table = tb; 1402 res->table = tb;
1403 res->fa_head = &li->falh; 1403 res->fa_head = &li->falh;
1404 if (!(fib_flags & FIB_LOOKUP_NOREF)) 1404 if (!(fib_flags & FIB_LOOKUP_NOREF))
1405 atomic_inc(&res->fi->fib_clntref); 1405 atomic_inc(&fi->fib_clntref);
1406 return 0; 1406 return 0;
1407 } 1407 }
1408 } 1408 }
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index c6933f2ea31..dbfc21de347 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -15,8 +15,8 @@
15#include <linux/kmod.h> 15#include <linux/kmod.h>
16#include <linux/skbuff.h> 16#include <linux/skbuff.h>
17#include <linux/in.h> 17#include <linux/in.h>
18#include <linux/ip.h>
18#include <linux/netdevice.h> 19#include <linux/netdevice.h>
19#include <linux/version.h>
20#include <linux/spinlock.h> 20#include <linux/spinlock.h>
21#include <net/protocol.h> 21#include <net/protocol.h>
22#include <net/gre.h> 22#include <net/gre.h>
@@ -97,27 +97,17 @@ drop:
97static void gre_err(struct sk_buff *skb, u32 info) 97static void gre_err(struct sk_buff *skb, u32 info)
98{ 98{
99 const struct gre_protocol *proto; 99 const struct gre_protocol *proto;
100 u8 ver; 100 const struct iphdr *iph = (const struct iphdr *)skb->data;
101 101 u8 ver = skb->data[(iph->ihl<<2) + 1]&0x7f;
102 if (!pskb_may_pull(skb, 12))
103 goto drop;
104 102
105 ver = skb->data[1]&0x7f;
106 if (ver >= GREPROTO_MAX) 103 if (ver >= GREPROTO_MAX)
107 goto drop; 104 return;
108 105
109 rcu_read_lock(); 106 rcu_read_lock();
110 proto = rcu_dereference(gre_proto[ver]); 107 proto = rcu_dereference(gre_proto[ver]);
111 if (!proto || !proto->err_handler) 108 if (proto && proto->err_handler)
112 goto drop_unlock; 109 proto->err_handler(skb, info);
113 proto->err_handler(skb, info);
114 rcu_read_unlock();
115 return;
116
117drop_unlock:
118 rcu_read_unlock(); 110 rcu_read_unlock();
119drop:
120 kfree_skb(skb);
121} 111}
122 112
123static const struct net_protocol net_gre_protocol = { 113static const struct net_protocol net_gre_protocol = {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 5395e45dcce..23ef31baa1a 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -380,6 +380,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
380 struct icmp_bxm *param) 380 struct icmp_bxm *param)
381{ 381{
382 struct rtable *rt, *rt2; 382 struct rtable *rt, *rt2;
383 struct flowi4 fl4_dec;
383 int err; 384 int err;
384 385
385 memset(fl4, 0, sizeof(*fl4)); 386 memset(fl4, 0, sizeof(*fl4));
@@ -408,19 +409,19 @@ static struct rtable *icmp_route_lookup(struct net *net,
408 } else 409 } else
409 return rt; 410 return rt;
410 411
411 err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(fl4), AF_INET); 412 err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4_dec), AF_INET);
412 if (err) 413 if (err)
413 goto relookup_failed; 414 goto relookup_failed;
414 415
415 if (inet_addr_type(net, fl4->saddr) == RTN_LOCAL) { 416 if (inet_addr_type(net, fl4_dec.saddr) == RTN_LOCAL) {
416 rt2 = __ip_route_output_key(net, fl4); 417 rt2 = __ip_route_output_key(net, &fl4_dec);
417 if (IS_ERR(rt2)) 418 if (IS_ERR(rt2))
418 err = PTR_ERR(rt2); 419 err = PTR_ERR(rt2);
419 } else { 420 } else {
420 struct flowi4 fl4_2 = {}; 421 struct flowi4 fl4_2 = {};
421 unsigned long orefdst; 422 unsigned long orefdst;
422 423
423 fl4_2.daddr = fl4->saddr; 424 fl4_2.daddr = fl4_dec.saddr;
424 rt2 = ip_route_output_key(net, &fl4_2); 425 rt2 = ip_route_output_key(net, &fl4_2);
425 if (IS_ERR(rt2)) { 426 if (IS_ERR(rt2)) {
426 err = PTR_ERR(rt2); 427 err = PTR_ERR(rt2);
@@ -428,7 +429,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
428 } 429 }
429 /* Ugh! */ 430 /* Ugh! */
430 orefdst = skb_in->_skb_refdst; /* save old refdst */ 431 orefdst = skb_in->_skb_refdst; /* save old refdst */
431 err = ip_route_input(skb_in, fl4->daddr, fl4->saddr, 432 err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr,
432 RT_TOS(tos), rt2->dst.dev); 433 RT_TOS(tos), rt2->dst.dev);
433 434
434 dst_release(&rt2->dst); 435 dst_release(&rt2->dst);
@@ -440,10 +441,11 @@ static struct rtable *icmp_route_lookup(struct net *net,
440 goto relookup_failed; 441 goto relookup_failed;
441 442
442 rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, 443 rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst,
443 flowi4_to_flowi(fl4), NULL, 444 flowi4_to_flowi(&fl4_dec), NULL,
444 XFRM_LOOKUP_ICMP); 445 XFRM_LOOKUP_ICMP);
445 if (!IS_ERR(rt2)) { 446 if (!IS_ERR(rt2)) {
446 dst_release(&rt->dst); 447 dst_release(&rt->dst);
448 memcpy(fl4, &fl4_dec, sizeof(*fl4));
447 rt = rt2; 449 rt = rt2;
448 } else if (PTR_ERR(rt2) == -EPERM) { 450 } else if (PTR_ERR(rt2) == -EPERM) {
449 if (rt) 451 if (rt)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index f1d27f6c935..e0d42dbb33f 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -767,7 +767,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
767 break; 767 break;
768 for (i=0; i<nsrcs; i++) { 768 for (i=0; i<nsrcs; i++) {
769 /* skip inactive filters */ 769 /* skip inactive filters */
770 if (pmc->sfcount[MCAST_INCLUDE] || 770 if (psf->sf_count[MCAST_INCLUDE] ||
771 pmc->sfcount[MCAST_EXCLUDE] != 771 pmc->sfcount[MCAST_EXCLUDE] !=
772 psf->sf_count[MCAST_EXCLUDE]) 772 psf->sf_count[MCAST_EXCLUDE])
773 continue; 773 continue;
@@ -875,6 +875,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
875 * to be intended in a v3 query. 875 * to be intended in a v3 query.
876 */ 876 */
877 max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE); 877 max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE);
878 if (!max_delay)
879 max_delay = 1; /* can't mod w/ 0 */
878 } else { /* v3 */ 880 } else { /* v3 */
879 if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) 881 if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)))
880 return; 882 return;
@@ -1718,7 +1720,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
1718 1720
1719 pmc->sfcount[sfmode]--; 1721 pmc->sfcount[sfmode]--;
1720 for (j=0; j<i; j++) 1722 for (j=0; j<i; j++)
1721 (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[i]); 1723 (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]);
1722 } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) { 1724 } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) {
1723#ifdef CONFIG_IP_MULTICAST 1725#ifdef CONFIG_IP_MULTICAST
1724 struct ip_sf_list *psf; 1726 struct ip_sf_list *psf;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 3267d389843..389a2e6a17f 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -869,7 +869,7 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
869 } 869 }
870 870
871 return netlink_dump_start(idiagnl, skb, nlh, 871 return netlink_dump_start(idiagnl, skb, nlh,
872 inet_diag_dump, NULL); 872 inet_diag_dump, NULL, 0);
873 } 873 }
874 874
875 return inet_diag_get_exact(skb, nlh); 875 return inet_diag_get_exact(skb, nlh);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 3c0369a3a66..984ec656b03 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -21,6 +21,7 @@
21 21
22#include <net/inet_connection_sock.h> 22#include <net/inet_connection_sock.h>
23#include <net/inet_hashtables.h> 23#include <net/inet_hashtables.h>
24#include <net/secure_seq.h>
24#include <net/ip.h> 25#include <net/ip.h>
25 26
26/* 27/*
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index 85a0f75dae6..ef7ae6049a5 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -146,8 +146,7 @@ static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
146} 146}
147 147
148static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb, 148static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
149 struct iphdr *iph, struct tcphdr *tcph, 149 struct iphdr *iph, struct tcphdr *tcph)
150 u16 vlan_tag, struct vlan_group *vgrp)
151{ 150{
152 int nr_frags; 151 int nr_frags;
153 __be32 *ptr; 152 __be32 *ptr;
@@ -173,8 +172,6 @@ static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
173 } 172 }
174 173
175 lro_desc->mss = tcp_data_len; 174 lro_desc->mss = tcp_data_len;
176 lro_desc->vgrp = vgrp;
177 lro_desc->vlan_tag = vlan_tag;
178 lro_desc->active = 1; 175 lro_desc->active = 1;
179 176
180 lro_desc->data_csum = lro_tcp_data_csum(iph, tcph, 177 lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
@@ -309,29 +306,17 @@ static void lro_flush(struct net_lro_mgr *lro_mgr,
309 306
310 skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss; 307 skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
311 308
312 if (lro_desc->vgrp) { 309 if (lro_mgr->features & LRO_F_NAPI)
313 if (lro_mgr->features & LRO_F_NAPI) 310 netif_receive_skb(lro_desc->parent);
314 vlan_hwaccel_receive_skb(lro_desc->parent, 311 else
315 lro_desc->vgrp, 312 netif_rx(lro_desc->parent);
316 lro_desc->vlan_tag);
317 else
318 vlan_hwaccel_rx(lro_desc->parent,
319 lro_desc->vgrp,
320 lro_desc->vlan_tag);
321
322 } else {
323 if (lro_mgr->features & LRO_F_NAPI)
324 netif_receive_skb(lro_desc->parent);
325 else
326 netif_rx(lro_desc->parent);
327 }
328 313
329 LRO_INC_STATS(lro_mgr, flushed); 314 LRO_INC_STATS(lro_mgr, flushed);
330 lro_clear_desc(lro_desc); 315 lro_clear_desc(lro_desc);
331} 316}
332 317
333static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, 318static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
334 struct vlan_group *vgrp, u16 vlan_tag, void *priv) 319 void *priv)
335{ 320{
336 struct net_lro_desc *lro_desc; 321 struct net_lro_desc *lro_desc;
337 struct iphdr *iph; 322 struct iphdr *iph;
@@ -360,7 +345,7 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
360 goto out; 345 goto out;
361 346
362 skb->ip_summed = lro_mgr->ip_summed_aggr; 347 skb->ip_summed = lro_mgr->ip_summed_aggr;
363 lro_init_desc(lro_desc, skb, iph, tcph, vlan_tag, vgrp); 348 lro_init_desc(lro_desc, skb, iph, tcph);
364 LRO_INC_STATS(lro_mgr, aggregated); 349 LRO_INC_STATS(lro_mgr, aggregated);
365 return 0; 350 return 0;
366 } 351 }
@@ -433,8 +418,7 @@ static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr,
433static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, 418static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
434 struct skb_frag_struct *frags, 419 struct skb_frag_struct *frags,
435 int len, int true_size, 420 int len, int true_size,
436 struct vlan_group *vgrp, 421 void *priv, __wsum sum)
437 u16 vlan_tag, void *priv, __wsum sum)
438{ 422{
439 struct net_lro_desc *lro_desc; 423 struct net_lro_desc *lro_desc;
440 struct iphdr *iph; 424 struct iphdr *iph;
@@ -480,7 +464,7 @@ static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
480 tcph = (void *)((u8 *)skb->data + vlan_hdr_len 464 tcph = (void *)((u8 *)skb->data + vlan_hdr_len
481 + IP_HDR_LEN(iph)); 465 + IP_HDR_LEN(iph));
482 466
483 lro_init_desc(lro_desc, skb, iph, tcph, 0, NULL); 467 lro_init_desc(lro_desc, skb, iph, tcph);
484 LRO_INC_STATS(lro_mgr, aggregated); 468 LRO_INC_STATS(lro_mgr, aggregated);
485 return NULL; 469 return NULL;
486 } 470 }
@@ -514,7 +498,7 @@ void lro_receive_skb(struct net_lro_mgr *lro_mgr,
514 struct sk_buff *skb, 498 struct sk_buff *skb,
515 void *priv) 499 void *priv)
516{ 500{
517 if (__lro_proc_skb(lro_mgr, skb, NULL, 0, priv)) { 501 if (__lro_proc_skb(lro_mgr, skb, priv)) {
518 if (lro_mgr->features & LRO_F_NAPI) 502 if (lro_mgr->features & LRO_F_NAPI)
519 netif_receive_skb(skb); 503 netif_receive_skb(skb);
520 else 504 else
@@ -523,29 +507,13 @@ void lro_receive_skb(struct net_lro_mgr *lro_mgr,
523} 507}
524EXPORT_SYMBOL(lro_receive_skb); 508EXPORT_SYMBOL(lro_receive_skb);
525 509
526void lro_vlan_hwaccel_receive_skb(struct net_lro_mgr *lro_mgr,
527 struct sk_buff *skb,
528 struct vlan_group *vgrp,
529 u16 vlan_tag,
530 void *priv)
531{
532 if (__lro_proc_skb(lro_mgr, skb, vgrp, vlan_tag, priv)) {
533 if (lro_mgr->features & LRO_F_NAPI)
534 vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag);
535 else
536 vlan_hwaccel_rx(skb, vgrp, vlan_tag);
537 }
538}
539EXPORT_SYMBOL(lro_vlan_hwaccel_receive_skb);
540
541void lro_receive_frags(struct net_lro_mgr *lro_mgr, 510void lro_receive_frags(struct net_lro_mgr *lro_mgr,
542 struct skb_frag_struct *frags, 511 struct skb_frag_struct *frags,
543 int len, int true_size, void *priv, __wsum sum) 512 int len, int true_size, void *priv, __wsum sum)
544{ 513{
545 struct sk_buff *skb; 514 struct sk_buff *skb;
546 515
547 skb = __lro_proc_segment(lro_mgr, frags, len, true_size, NULL, 0, 516 skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum);
548 priv, sum);
549 if (!skb) 517 if (!skb)
550 return; 518 return;
551 519
@@ -556,26 +524,6 @@ void lro_receive_frags(struct net_lro_mgr *lro_mgr,
556} 524}
557EXPORT_SYMBOL(lro_receive_frags); 525EXPORT_SYMBOL(lro_receive_frags);
558 526
559void lro_vlan_hwaccel_receive_frags(struct net_lro_mgr *lro_mgr,
560 struct skb_frag_struct *frags,
561 int len, int true_size,
562 struct vlan_group *vgrp,
563 u16 vlan_tag, void *priv, __wsum sum)
564{
565 struct sk_buff *skb;
566
567 skb = __lro_proc_segment(lro_mgr, frags, len, true_size, vgrp,
568 vlan_tag, priv, sum);
569 if (!skb)
570 return;
571
572 if (lro_mgr->features & LRO_F_NAPI)
573 vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag);
574 else
575 vlan_hwaccel_rx(skb, vgrp, vlan_tag);
576}
577EXPORT_SYMBOL(lro_vlan_hwaccel_receive_frags);
578
579void lro_flush_all(struct net_lro_mgr *lro_mgr) 527void lro_flush_all(struct net_lro_mgr *lro_mgr)
580{ 528{
581 int i; 529 int i;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index ce616d92cc5..86f13c67ea8 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -19,6 +19,7 @@
19#include <linux/net.h> 19#include <linux/net.h>
20#include <net/ip.h> 20#include <net/ip.h>
21#include <net/inetpeer.h> 21#include <net/inetpeer.h>
22#include <net/secure_seq.h>
22 23
23/* 24/*
24 * Theory of operations. 25 * Theory of operations.
@@ -54,15 +55,11 @@
54 * 1. Nodes may appear in the tree only with the pool lock held. 55 * 1. Nodes may appear in the tree only with the pool lock held.
55 * 2. Nodes may disappear from the tree only with the pool lock held 56 * 2. Nodes may disappear from the tree only with the pool lock held
56 * AND reference count being 0. 57 * AND reference count being 0.
57 * 3. Nodes appears and disappears from unused node list only under 58 * 3. Global variable peer_total is modified under the pool lock.
58 * "inet_peer_unused_lock". 59 * 4. struct inet_peer fields modification:
59 * 4. Global variable peer_total is modified under the pool lock.
60 * 5. struct inet_peer fields modification:
61 * avl_left, avl_right, avl_parent, avl_height: pool lock 60 * avl_left, avl_right, avl_parent, avl_height: pool lock
62 * unused: unused node list lock
63 * refcnt: atomically against modifications on other CPU; 61 * refcnt: atomically against modifications on other CPU;
64 * usually under some other lock to prevent node disappearing 62 * usually under some other lock to prevent node disappearing
65 * dtime: unused node list lock
66 * daddr: unchangeable 63 * daddr: unchangeable
67 * ip_id_count: atomic value (no lock needed) 64 * ip_id_count: atomic value (no lock needed)
68 */ 65 */
@@ -104,19 +101,6 @@ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries m
104 * aggressively at this stage */ 101 * aggressively at this stage */
105int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ 102int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */
106int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ 103int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */
107int inet_peer_gc_mintime __read_mostly = 10 * HZ;
108int inet_peer_gc_maxtime __read_mostly = 120 * HZ;
109
110static struct {
111 struct list_head list;
112 spinlock_t lock;
113} unused_peers = {
114 .list = LIST_HEAD_INIT(unused_peers.list),
115 .lock = __SPIN_LOCK_UNLOCKED(unused_peers.lock),
116};
117
118static void peer_check_expire(unsigned long dummy);
119static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0);
120 104
121 105
122/* Called from ip_output.c:ip_init */ 106/* Called from ip_output.c:ip_init */
@@ -142,21 +126,6 @@ void __init inet_initpeers(void)
142 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, 126 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
143 NULL); 127 NULL);
144 128
145 /* All the timers, started at system startup tend
146 to synchronize. Perturb it a bit.
147 */
148 peer_periodic_timer.expires = jiffies
149 + net_random() % inet_peer_gc_maxtime
150 + inet_peer_gc_maxtime;
151 add_timer(&peer_periodic_timer);
152}
153
154/* Called with or without local BH being disabled. */
155static void unlink_from_unused(struct inet_peer *p)
156{
157 spin_lock_bh(&unused_peers.lock);
158 list_del_init(&p->unused);
159 spin_unlock_bh(&unused_peers.lock);
160} 129}
161 130
162static int addr_compare(const struct inetpeer_addr *a, 131static int addr_compare(const struct inetpeer_addr *a,
@@ -203,20 +172,6 @@ static int addr_compare(const struct inetpeer_addr *a,
203 u; \ 172 u; \
204}) 173})
205 174
206static bool atomic_add_unless_return(atomic_t *ptr, int a, int u, int *newv)
207{
208 int cur, old = atomic_read(ptr);
209
210 while (old != u) {
211 *newv = old + a;
212 cur = atomic_cmpxchg(ptr, old, *newv);
213 if (cur == old)
214 return true;
215 old = cur;
216 }
217 return false;
218}
219
220/* 175/*
221 * Called with rcu_read_lock() 176 * Called with rcu_read_lock()
222 * Because we hold no lock against a writer, its quite possible we fall 177 * Because we hold no lock against a writer, its quite possible we fall
@@ -225,8 +180,7 @@ static bool atomic_add_unless_return(atomic_t *ptr, int a, int u, int *newv)
225 * We exit from this function if number of links exceeds PEER_MAXDEPTH 180 * We exit from this function if number of links exceeds PEER_MAXDEPTH
226 */ 181 */
227static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, 182static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
228 struct inet_peer_base *base, 183 struct inet_peer_base *base)
229 int *newrefcnt)
230{ 184{
231 struct inet_peer *u = rcu_dereference(base->root); 185 struct inet_peer *u = rcu_dereference(base->root);
232 int count = 0; 186 int count = 0;
@@ -235,11 +189,9 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
235 int cmp = addr_compare(daddr, &u->daddr); 189 int cmp = addr_compare(daddr, &u->daddr);
236 if (cmp == 0) { 190 if (cmp == 0) {
237 /* Before taking a reference, check if this entry was 191 /* Before taking a reference, check if this entry was
238 * deleted, unlink_from_pool() sets refcnt=-1 to make 192 * deleted (refcnt=-1)
239 * distinction between an unused entry (refcnt=0) and
240 * a freed one.
241 */ 193 */
242 if (!atomic_add_unless_return(&u->refcnt, 1, -1, newrefcnt)) 194 if (!atomic_add_unless(&u->refcnt, 1, -1))
243 u = NULL; 195 u = NULL;
244 return u; 196 return u;
245 } 197 }
@@ -366,137 +318,99 @@ static void inetpeer_free_rcu(struct rcu_head *head)
366 kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu)); 318 kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
367} 319}
368 320
369/* May be called with local BH enabled. */
370static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, 321static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
371 struct inet_peer __rcu **stack[PEER_MAXDEPTH]) 322 struct inet_peer __rcu **stack[PEER_MAXDEPTH])
372{ 323{
373 int do_free; 324 struct inet_peer __rcu ***stackptr, ***delp;
374 325
375 do_free = 0; 326 if (lookup(&p->daddr, stack, base) != p)
376 327 BUG();
377 write_seqlock_bh(&base->lock); 328 delp = stackptr - 1; /* *delp[0] == p */
378 /* Check the reference counter. It was artificially incremented by 1 329 if (p->avl_left == peer_avl_empty_rcu) {
379 * in cleanup() function to prevent sudden disappearing. If we can 330 *delp[0] = p->avl_right;
380 * atomically (because of lockless readers) take this last reference, 331 --stackptr;
381 * it's safe to remove the node and free it later. 332 } else {
382 * We use refcnt=-1 to alert lockless readers this entry is deleted. 333 /* look for a node to insert instead of p */
383 */ 334 struct inet_peer *t;
384 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { 335 t = lookup_rightempty(p, base);
385 struct inet_peer __rcu ***stackptr, ***delp; 336 BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t);
386 if (lookup(&p->daddr, stack, base) != p) 337 **--stackptr = t->avl_left;
387 BUG(); 338 /* t is removed, t->daddr > x->daddr for any
388 delp = stackptr - 1; /* *delp[0] == p */ 339 * x in p->avl_left subtree.
389 if (p->avl_left == peer_avl_empty_rcu) { 340 * Put t in the old place of p. */
390 *delp[0] = p->avl_right; 341 RCU_INIT_POINTER(*delp[0], t);
391 --stackptr; 342 t->avl_left = p->avl_left;
392 } else { 343 t->avl_right = p->avl_right;
393 /* look for a node to insert instead of p */ 344 t->avl_height = p->avl_height;
394 struct inet_peer *t; 345 BUG_ON(delp[1] != &p->avl_left);
395 t = lookup_rightempty(p, base); 346 delp[1] = &t->avl_left; /* was &p->avl_left */
396 BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t);
397 **--stackptr = t->avl_left;
398 /* t is removed, t->daddr > x->daddr for any
399 * x in p->avl_left subtree.
400 * Put t in the old place of p. */
401 RCU_INIT_POINTER(*delp[0], t);
402 t->avl_left = p->avl_left;
403 t->avl_right = p->avl_right;
404 t->avl_height = p->avl_height;
405 BUG_ON(delp[1] != &p->avl_left);
406 delp[1] = &t->avl_left; /* was &p->avl_left */
407 }
408 peer_avl_rebalance(stack, stackptr, base);
409 base->total--;
410 do_free = 1;
411 } 347 }
412 write_sequnlock_bh(&base->lock); 348 peer_avl_rebalance(stack, stackptr, base);
413 349 base->total--;
414 if (do_free) 350 call_rcu(&p->rcu, inetpeer_free_rcu);
415 call_rcu(&p->rcu, inetpeer_free_rcu);
416 else
417 /* The node is used again. Decrease the reference counter
418 * back. The loop "cleanup -> unlink_from_unused
419 * -> unlink_from_pool -> putpeer -> link_to_unused
420 * -> cleanup (for the same node)"
421 * doesn't really exist because the entry will have a
422 * recent deletion time and will not be cleaned again soon.
423 */
424 inet_putpeer(p);
425} 351}
426 352
427static struct inet_peer_base *family_to_base(int family) 353static struct inet_peer_base *family_to_base(int family)
428{ 354{
429 return (family == AF_INET ? &v4_peers : &v6_peers); 355 return family == AF_INET ? &v4_peers : &v6_peers;
430}
431
432static struct inet_peer_base *peer_to_base(struct inet_peer *p)
433{
434 return family_to_base(p->daddr.family);
435} 356}
436 357
437/* May be called with local BH enabled. */ 358/* perform garbage collect on all items stacked during a lookup */
438static int cleanup_once(unsigned long ttl, struct inet_peer __rcu **stack[PEER_MAXDEPTH]) 359static int inet_peer_gc(struct inet_peer_base *base,
360 struct inet_peer __rcu **stack[PEER_MAXDEPTH],
361 struct inet_peer __rcu ***stackptr)
439{ 362{
440 struct inet_peer *p = NULL; 363 struct inet_peer *p, *gchead = NULL;
441 364 __u32 delta, ttl;
442 /* Remove the first entry from the list of unused nodes. */ 365 int cnt = 0;
443 spin_lock_bh(&unused_peers.lock);
444 if (!list_empty(&unused_peers.list)) {
445 __u32 delta;
446
447 p = list_first_entry(&unused_peers.list, struct inet_peer, unused);
448 delta = (__u32)jiffies - p->dtime;
449 366
450 if (delta < ttl) { 367 if (base->total >= inet_peer_threshold)
451 /* Do not prune fresh entries. */ 368 ttl = 0; /* be aggressive */
452 spin_unlock_bh(&unused_peers.lock); 369 else
453 return -1; 370 ttl = inet_peer_maxttl
371 - (inet_peer_maxttl - inet_peer_minttl) / HZ *
372 base->total / inet_peer_threshold * HZ;
373 stackptr--; /* last stack slot is peer_avl_empty */
374 while (stackptr > stack) {
375 stackptr--;
376 p = rcu_deref_locked(**stackptr, base);
377 if (atomic_read(&p->refcnt) == 0) {
378 smp_rmb();
379 delta = (__u32)jiffies - p->dtime;
380 if (delta >= ttl &&
381 atomic_cmpxchg(&p->refcnt, 0, -1) == 0) {
382 p->gc_next = gchead;
383 gchead = p;
384 }
454 } 385 }
455
456 list_del_init(&p->unused);
457
458 /* Grab an extra reference to prevent node disappearing
459 * before unlink_from_pool() call. */
460 atomic_inc(&p->refcnt);
461 } 386 }
462 spin_unlock_bh(&unused_peers.lock); 387 while ((p = gchead) != NULL) {
463 388 gchead = p->gc_next;
464 if (p == NULL) 389 cnt++;
465 /* It means that the total number of USED entries has 390 unlink_from_pool(p, base, stack);
466 * grown over inet_peer_threshold. It shouldn't really 391 }
467 * happen because of entry limits in route cache. */ 392 return cnt;
468 return -1;
469
470 unlink_from_pool(p, peer_to_base(p), stack);
471 return 0;
472} 393}
473 394
474/* Called with or without local BH being disabled. */ 395struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create)
475struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
476{ 396{
477 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; 397 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
478 struct inet_peer_base *base = family_to_base(daddr->family); 398 struct inet_peer_base *base = family_to_base(daddr->family);
479 struct inet_peer *p; 399 struct inet_peer *p;
480 unsigned int sequence; 400 unsigned int sequence;
481 int invalidated, newrefcnt = 0; 401 int invalidated, gccnt = 0;
482 402
483 /* Look up for the address quickly, lockless. 403 /* Attempt a lockless lookup first.
484 * Because of a concurrent writer, we might not find an existing entry. 404 * Because of a concurrent writer, we might not find an existing entry.
485 */ 405 */
486 rcu_read_lock(); 406 rcu_read_lock();
487 sequence = read_seqbegin(&base->lock); 407 sequence = read_seqbegin(&base->lock);
488 p = lookup_rcu(daddr, base, &newrefcnt); 408 p = lookup_rcu(daddr, base);
489 invalidated = read_seqretry(&base->lock, sequence); 409 invalidated = read_seqretry(&base->lock, sequence);
490 rcu_read_unlock(); 410 rcu_read_unlock();
491 411
492 if (p) { 412 if (p)
493found: /* The existing node has been found.
494 * Remove the entry from unused list if it was there.
495 */
496 if (newrefcnt == 1)
497 unlink_from_unused(p);
498 return p; 413 return p;
499 }
500 414
501 /* If no writer did a change during our lookup, we can return early. */ 415 /* If no writer did a change during our lookup, we can return early. */
502 if (!create && !invalidated) 416 if (!create && !invalidated)
@@ -506,18 +420,27 @@ found: /* The existing node has been found.
506 * At least, nodes should be hot in our cache. 420 * At least, nodes should be hot in our cache.
507 */ 421 */
508 write_seqlock_bh(&base->lock); 422 write_seqlock_bh(&base->lock);
423relookup:
509 p = lookup(daddr, stack, base); 424 p = lookup(daddr, stack, base);
510 if (p != peer_avl_empty) { 425 if (p != peer_avl_empty) {
511 newrefcnt = atomic_inc_return(&p->refcnt); 426 atomic_inc(&p->refcnt);
512 write_sequnlock_bh(&base->lock); 427 write_sequnlock_bh(&base->lock);
513 goto found; 428 return p;
429 }
430 if (!gccnt) {
431 gccnt = inet_peer_gc(base, stack, stackptr);
432 if (gccnt && create)
433 goto relookup;
514 } 434 }
515 p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; 435 p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
516 if (p) { 436 if (p) {
517 p->daddr = *daddr; 437 p->daddr = *daddr;
518 atomic_set(&p->refcnt, 1); 438 atomic_set(&p->refcnt, 1);
519 atomic_set(&p->rid, 0); 439 atomic_set(&p->rid, 0);
520 atomic_set(&p->ip_id_count, secure_ip_id(daddr->addr.a4)); 440 atomic_set(&p->ip_id_count,
441 (daddr->family == AF_INET) ?
442 secure_ip_id(daddr->addr.a4) :
443 secure_ipv6_id(daddr->addr.a6));
521 p->tcp_ts_stamp = 0; 444 p->tcp_ts_stamp = 0;
522 p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; 445 p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
523 p->rate_tokens = 0; 446 p->rate_tokens = 0;
@@ -525,7 +448,6 @@ found: /* The existing node has been found.
525 p->pmtu_expires = 0; 448 p->pmtu_expires = 0;
526 p->pmtu_orig = 0; 449 p->pmtu_orig = 0;
527 memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); 450 memset(&p->redirect_learned, 0, sizeof(p->redirect_learned));
528 INIT_LIST_HEAD(&p->unused);
529 451
530 452
531 /* Link the node. */ 453 /* Link the node. */
@@ -534,63 +456,15 @@ found: /* The existing node has been found.
534 } 456 }
535 write_sequnlock_bh(&base->lock); 457 write_sequnlock_bh(&base->lock);
536 458
537 if (base->total >= inet_peer_threshold)
538 /* Remove one less-recently-used entry. */
539 cleanup_once(0, stack);
540
541 return p; 459 return p;
542} 460}
543
544static int compute_total(void)
545{
546 return v4_peers.total + v6_peers.total;
547}
548EXPORT_SYMBOL_GPL(inet_getpeer); 461EXPORT_SYMBOL_GPL(inet_getpeer);
549 462
550/* Called with local BH disabled. */
551static void peer_check_expire(unsigned long dummy)
552{
553 unsigned long now = jiffies;
554 int ttl, total;
555 struct inet_peer __rcu **stack[PEER_MAXDEPTH];
556
557 total = compute_total();
558 if (total >= inet_peer_threshold)
559 ttl = inet_peer_minttl;
560 else
561 ttl = inet_peer_maxttl
562 - (inet_peer_maxttl - inet_peer_minttl) / HZ *
563 total / inet_peer_threshold * HZ;
564 while (!cleanup_once(ttl, stack)) {
565 if (jiffies != now)
566 break;
567 }
568
569 /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime
570 * interval depending on the total number of entries (more entries,
571 * less interval). */
572 total = compute_total();
573 if (total >= inet_peer_threshold)
574 peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime;
575 else
576 peer_periodic_timer.expires = jiffies
577 + inet_peer_gc_maxtime
578 - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
579 total / inet_peer_threshold * HZ;
580 add_timer(&peer_periodic_timer);
581}
582
583void inet_putpeer(struct inet_peer *p) 463void inet_putpeer(struct inet_peer *p)
584{ 464{
585 local_bh_disable(); 465 p->dtime = (__u32)jiffies;
586 466 smp_mb__before_atomic_dec();
587 if (atomic_dec_and_lock(&p->refcnt, &unused_peers.lock)) { 467 atomic_dec(&p->refcnt);
588 list_add_tail(&p->unused, &unused_peers.list);
589 p->dtime = (__u32)jiffies;
590 spin_unlock(&unused_peers.lock);
591 }
592
593 local_bh_enable();
594} 468}
595EXPORT_SYMBOL_GPL(inet_putpeer); 469EXPORT_SYMBOL_GPL(inet_putpeer);
596 470
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 0ad6035f636..0e0ab98abc6 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -261,8 +261,9 @@ static void ip_expire(unsigned long arg)
261 * Only an end host needs to send an ICMP 261 * Only an end host needs to send an ICMP
262 * "Fragment Reassembly Timeout" message, per RFC792. 262 * "Fragment Reassembly Timeout" message, per RFC792.
263 */ 263 */
264 if (qp->user == IP_DEFRAG_CONNTRACK_IN && 264 if (qp->user == IP_DEFRAG_AF_PACKET ||
265 skb_rtable(head)->rt_type != RTN_LOCAL) 265 (qp->user == IP_DEFRAG_CONNTRACK_IN &&
266 skb_rtable(head)->rt_type != RTN_LOCAL))
266 goto out_rcu_unlock; 267 goto out_rcu_unlock;
267 268
268 269
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 8871067560d..d7bb94c4834 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -731,9 +731,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
731 } 731 }
732#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 732#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
733 else if (skb->protocol == htons(ETH_P_IPV6)) { 733 else if (skb->protocol == htons(ETH_P_IPV6)) {
734 struct neighbour *neigh = dst_get_neighbour(skb_dst(skb));
734 const struct in6_addr *addr6; 735 const struct in6_addr *addr6;
735 int addr_type; 736 int addr_type;
736 struct neighbour *neigh = skb_dst(skb)->neighbour;
737 737
738 if (neigh == NULL) 738 if (neigh == NULL)
739 goto tx_error; 739 goto tx_error;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index c8f48efc5fd..073a9b01c40 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -165,7 +165,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
165 (!sk->sk_bound_dev_if || 165 (!sk->sk_bound_dev_if ||
166 sk->sk_bound_dev_if == dev->ifindex) && 166 sk->sk_bound_dev_if == dev->ifindex) &&
167 net_eq(sock_net(sk), dev_net(dev))) { 167 net_eq(sock_net(sk), dev_net(dev))) {
168 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 168 if (ip_is_fragment(ip_hdr(skb))) {
169 if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) 169 if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN))
170 return 1; 170 return 1;
171 } 171 }
@@ -256,7 +256,7 @@ int ip_local_deliver(struct sk_buff *skb)
256 * Reassemble IP fragments. 256 * Reassemble IP fragments.
257 */ 257 */
258 258
259 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 259 if (ip_is_fragment(ip_hdr(skb))) {
260 if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER)) 260 if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER))
261 return 0; 261 return 0;
262 } 262 }
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ec93335901d..05d20cca9d6 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -640,6 +640,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
640 } 640 }
641 if (srrptr <= srrspace) { 641 if (srrptr <= srrspace) {
642 opt->srr_is_hit = 1; 642 opt->srr_is_hit = 1;
643 iph->daddr = nexthop;
643 opt->is_changed = 1; 644 opt->is_changed = 1;
644 } 645 }
645 return 0; 646 return 0;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 84f26e8e6c6..8c6563361ab 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -122,6 +122,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb)
122 newskb->pkt_type = PACKET_LOOPBACK; 122 newskb->pkt_type = PACKET_LOOPBACK;
123 newskb->ip_summed = CHECKSUM_UNNECESSARY; 123 newskb->ip_summed = CHECKSUM_UNNECESSARY;
124 WARN_ON(!skb_dst(newskb)); 124 WARN_ON(!skb_dst(newskb));
125 skb_dst_force(newskb);
125 netif_rx_ni(newskb); 126 netif_rx_ni(newskb);
126 return 0; 127 return 0;
127} 128}
@@ -182,6 +183,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
182 struct rtable *rt = (struct rtable *)dst; 183 struct rtable *rt = (struct rtable *)dst;
183 struct net_device *dev = dst->dev; 184 struct net_device *dev = dst->dev;
184 unsigned int hh_len = LL_RESERVED_SPACE(dev); 185 unsigned int hh_len = LL_RESERVED_SPACE(dev);
186 struct neighbour *neigh;
185 187
186 if (rt->rt_type == RTN_MULTICAST) { 188 if (rt->rt_type == RTN_MULTICAST) {
187 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); 189 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len);
@@ -203,10 +205,15 @@ static inline int ip_finish_output2(struct sk_buff *skb)
203 skb = skb2; 205 skb = skb2;
204 } 206 }
205 207
206 if (dst->hh) 208 rcu_read_lock();
207 return neigh_hh_output(dst->hh, skb); 209 neigh = dst_get_neighbour(dst);
208 else if (dst->neighbour) 210 if (neigh) {
209 return dst->neighbour->output(skb); 211 int res = neigh_output(neigh, skb);
212
213 rcu_read_unlock();
214 return res;
215 }
216 rcu_read_unlock();
210 217
211 if (net_ratelimit()) 218 if (net_ratelimit())
212 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); 219 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
@@ -489,7 +496,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
489 496
490 if (first_len - hlen > mtu || 497 if (first_len - hlen > mtu ||
491 ((first_len - hlen) & 7) || 498 ((first_len - hlen) & 7) ||
492 (iph->frag_off & htons(IP_MF|IP_OFFSET)) || 499 ip_is_fragment(iph) ||
493 skb_cloned(skb)) 500 skb_cloned(skb))
494 goto slow_path; 501 goto slow_path;
495 502
@@ -734,7 +741,7 @@ static inline int ip_ufo_append_data(struct sock *sk,
734 int getfrag(void *from, char *to, int offset, int len, 741 int getfrag(void *from, char *to, int offset, int len,
735 int odd, struct sk_buff *skb), 742 int odd, struct sk_buff *skb),
736 void *from, int length, int hh_len, int fragheaderlen, 743 void *from, int length, int hh_len, int fragheaderlen,
737 int transhdrlen, int mtu, unsigned int flags) 744 int transhdrlen, int maxfraglen, unsigned int flags)
738{ 745{
739 struct sk_buff *skb; 746 struct sk_buff *skb;
740 int err; 747 int err;
@@ -767,7 +774,7 @@ static inline int ip_ufo_append_data(struct sock *sk,
767 skb->csum = 0; 774 skb->csum = 0;
768 775
769 /* specify the length of each IP datagram fragment */ 776 /* specify the length of each IP datagram fragment */
770 skb_shinfo(skb)->gso_size = mtu - fragheaderlen; 777 skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen;
771 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 778 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
772 __skb_queue_tail(queue, skb); 779 __skb_queue_tail(queue, skb);
773 } 780 }
@@ -831,7 +838,7 @@ static int __ip_append_data(struct sock *sk,
831 (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) { 838 (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) {
832 err = ip_ufo_append_data(sk, queue, getfrag, from, length, 839 err = ip_ufo_append_data(sk, queue, getfrag, from, length,
833 hh_len, fragheaderlen, transhdrlen, 840 hh_len, fragheaderlen, transhdrlen,
834 mtu, flags); 841 maxfraglen, flags);
835 if (err) 842 if (err)
836 goto error; 843 goto error;
837 return 0; 844 return 0;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ab0c9efd1ef..8905e92f896 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1067,7 +1067,7 @@ EXPORT_SYMBOL(compat_ip_setsockopt);
1067 */ 1067 */
1068 1068
1069static int do_ip_getsockopt(struct sock *sk, int level, int optname, 1069static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1070 char __user *optval, int __user *optlen) 1070 char __user *optval, int __user *optlen, unsigned flags)
1071{ 1071{
1072 struct inet_sock *inet = inet_sk(sk); 1072 struct inet_sock *inet = inet_sk(sk);
1073 int val; 1073 int val;
@@ -1240,7 +1240,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1240 1240
1241 msg.msg_control = optval; 1241 msg.msg_control = optval;
1242 msg.msg_controllen = len; 1242 msg.msg_controllen = len;
1243 msg.msg_flags = 0; 1243 msg.msg_flags = flags;
1244 1244
1245 if (inet->cmsg_flags & IP_CMSG_PKTINFO) { 1245 if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
1246 struct in_pktinfo info; 1246 struct in_pktinfo info;
@@ -1294,7 +1294,7 @@ int ip_getsockopt(struct sock *sk, int level,
1294{ 1294{
1295 int err; 1295 int err;
1296 1296
1297 err = do_ip_getsockopt(sk, level, optname, optval, optlen); 1297 err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
1298#ifdef CONFIG_NETFILTER 1298#ifdef CONFIG_NETFILTER
1299 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1299 /* we need to exclude all possible ENOPROTOOPTs except default case */
1300 if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && 1300 if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
@@ -1327,7 +1327,8 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
1327 return compat_mc_getsockopt(sk, level, optname, optval, optlen, 1327 return compat_mc_getsockopt(sk, level, optname, optval, optlen,
1328 ip_getsockopt); 1328 ip_getsockopt);
1329 1329
1330 err = do_ip_getsockopt(sk, level, optname, optval, optlen); 1330 err = do_ip_getsockopt(sk, level, optname, optval, optlen,
1331 MSG_CMSG_COMPAT);
1331 1332
1332#ifdef CONFIG_NETFILTER 1333#ifdef CONFIG_NETFILTER
1333 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1334 /* we need to exclude all possible ENOPROTOOPTs except default case */
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index ab7e5542c1c..004bb74b41c 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -252,6 +252,10 @@ static int __init ic_open_devs(void)
252 } 252 }
253 } 253 }
254 254
255 /* no point in waiting if we could not bring up at least one device */
256 if (!ic_first_dev)
257 goto have_carrier;
258
255 /* wait for a carrier on at least one device */ 259 /* wait for a carrier on at least one device */
256 start = jiffies; 260 start = jiffies;
257 while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) { 261 while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) {
@@ -861,41 +865,44 @@ static void __init ic_do_bootp_ext(u8 *ext)
861#endif 865#endif
862 866
863 switch (*ext++) { 867 switch (*ext++) {
864 case 1: /* Subnet mask */ 868 case 1: /* Subnet mask */
865 if (ic_netmask == NONE) 869 if (ic_netmask == NONE)
866 memcpy(&ic_netmask, ext+1, 4); 870 memcpy(&ic_netmask, ext+1, 4);
867 break; 871 break;
868 case 3: /* Default gateway */ 872 case 3: /* Default gateway */
869 if (ic_gateway == NONE) 873 if (ic_gateway == NONE)
870 memcpy(&ic_gateway, ext+1, 4); 874 memcpy(&ic_gateway, ext+1, 4);
871 break; 875 break;
872 case 6: /* DNS server */ 876 case 6: /* DNS server */
873 servers= *ext/4; 877 servers= *ext/4;
874 if (servers > CONF_NAMESERVERS_MAX) 878 if (servers > CONF_NAMESERVERS_MAX)
875 servers = CONF_NAMESERVERS_MAX; 879 servers = CONF_NAMESERVERS_MAX;
876 for (i = 0; i < servers; i++) { 880 for (i = 0; i < servers; i++) {
877 if (ic_nameservers[i] == NONE) 881 if (ic_nameservers[i] == NONE)
878 memcpy(&ic_nameservers[i], ext+1+4*i, 4); 882 memcpy(&ic_nameservers[i], ext+1+4*i, 4);
879 } 883 }
880 break; 884 break;
881 case 12: /* Host name */ 885 case 12: /* Host name */
882 ic_bootp_string(utsname()->nodename, ext+1, *ext, __NEW_UTS_LEN); 886 ic_bootp_string(utsname()->nodename, ext+1, *ext,
883 ic_host_name_set = 1; 887 __NEW_UTS_LEN);
884 break; 888 ic_host_name_set = 1;
885 case 15: /* Domain name (DNS) */ 889 break;
886 ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain)); 890 case 15: /* Domain name (DNS) */
887 break; 891 ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain));
888 case 17: /* Root path */ 892 break;
889 if (!root_server_path[0]) 893 case 17: /* Root path */
890 ic_bootp_string(root_server_path, ext+1, *ext, sizeof(root_server_path)); 894 if (!root_server_path[0])
891 break; 895 ic_bootp_string(root_server_path, ext+1, *ext,
892 case 26: /* Interface MTU */ 896 sizeof(root_server_path));
893 memcpy(&mtu, ext+1, sizeof(mtu)); 897 break;
894 ic_dev_mtu = ntohs(mtu); 898 case 26: /* Interface MTU */
895 break; 899 memcpy(&mtu, ext+1, sizeof(mtu));
896 case 40: /* NIS Domain name (_not_ DNS) */ 900 ic_dev_mtu = ntohs(mtu);
897 ic_bootp_string(utsname()->domainname, ext+1, *ext, __NEW_UTS_LEN); 901 break;
898 break; 902 case 40: /* NIS Domain name (_not_ DNS) */
903 ic_bootp_string(utsname()->domainname, ext+1, *ext,
904 __NEW_UTS_LEN);
905 break;
899 } 906 }
900} 907}
901 908
@@ -932,7 +939,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
932 goto drop; 939 goto drop;
933 940
934 /* Fragments are not supported */ 941 /* Fragments are not supported */
935 if (h->frag_off & htons(IP_OFFSET | IP_MF)) { 942 if (ip_is_fragment(h)) {
936 if (net_ratelimit()) 943 if (net_ratelimit())
937 printk(KERN_ERR "DHCP/BOOTP: Ignoring fragmented " 944 printk(KERN_ERR "DHCP/BOOTP: Ignoring fragmented "
938 "reply.\n"); 945 "reply.\n");
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 378b20b7ca6..6f06f7f39ea 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -285,6 +285,8 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
285 if (register_netdevice(dev) < 0) 285 if (register_netdevice(dev) < 0)
286 goto failed_free; 286 goto failed_free;
287 287
288 strcpy(nt->parms.name, dev->name);
289
288 dev_hold(dev); 290 dev_hold(dev);
289 ipip_tunnel_link(ipn, nt); 291 ipip_tunnel_link(ipn, nt);
290 return nt; 292 return nt;
@@ -759,7 +761,6 @@ static int ipip_tunnel_init(struct net_device *dev)
759 struct ip_tunnel *tunnel = netdev_priv(dev); 761 struct ip_tunnel *tunnel = netdev_priv(dev);
760 762
761 tunnel->dev = dev; 763 tunnel->dev = dev;
762 strcpy(tunnel->parms.name, dev->name);
763 764
764 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 765 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
765 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 766 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
@@ -825,6 +826,7 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
825static int __net_init ipip_init_net(struct net *net) 826static int __net_init ipip_init_net(struct net *net)
826{ 827{
827 struct ipip_net *ipn = net_generic(net, ipip_net_id); 828 struct ipip_net *ipn = net_generic(net, ipip_net_id);
829 struct ip_tunnel *t;
828 int err; 830 int err;
829 831
830 ipn->tunnels[0] = ipn->tunnels_wc; 832 ipn->tunnels[0] = ipn->tunnels_wc;
@@ -848,6 +850,9 @@ static int __net_init ipip_init_net(struct net *net)
848 if ((err = register_netdev(ipn->fb_tunnel_dev))) 850 if ((err = register_netdev(ipn->fb_tunnel_dev)))
849 goto err_reg_dev; 851 goto err_reg_dev;
850 852
853 t = netdev_priv(ipn->fb_tunnel_dev);
854
855 strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
851 return 0; 856 return 0;
852 857
853err_reg_dev: 858err_reg_dev:
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 30a7763c400..58e87915797 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1796,7 +1796,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
1796 struct flowi4 fl4 = { 1796 struct flowi4 fl4 = {
1797 .daddr = iph->daddr, 1797 .daddr = iph->daddr,
1798 .saddr = iph->saddr, 1798 .saddr = iph->saddr,
1799 .flowi4_tos = iph->tos, 1799 .flowi4_tos = RT_TOS(iph->tos),
1800 .flowi4_oif = rt->rt_oif, 1800 .flowi4_oif = rt->rt_oif,
1801 .flowi4_iif = rt->rt_iif, 1801 .flowi4_iif = rt->rt_iif,
1802 .flowi4_mark = rt->rt_mark, 1802 .flowi4_mark = rt->rt_mark,
@@ -2544,7 +2544,8 @@ int __init ip_mr_init(void)
2544 goto add_proto_fail; 2544 goto add_proto_fail;
2545 } 2545 }
2546#endif 2546#endif
2547 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute); 2547 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
2548 NULL, ipmr_rtm_dumproute, NULL);
2548 return 0; 2549 return 0;
2549 2550
2550#ifdef CONFIG_IP_PIMSM_V2 2551#ifdef CONFIG_IP_PIMSM_V2
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 2e97e3ec1eb..929b27bdeb7 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -18,17 +18,15 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
18 struct rtable *rt; 18 struct rtable *rt;
19 struct flowi4 fl4 = {}; 19 struct flowi4 fl4 = {};
20 __be32 saddr = iph->saddr; 20 __be32 saddr = iph->saddr;
21 __u8 flags = 0; 21 __u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
22 unsigned int hh_len; 22 unsigned int hh_len;
23 23
24 if (!skb->sk && addr_type != RTN_LOCAL) { 24 if (addr_type == RTN_UNSPEC)
25 if (addr_type == RTN_UNSPEC) 25 addr_type = inet_addr_type(net, saddr);
26 addr_type = inet_addr_type(net, saddr); 26 if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)
27 if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) 27 flags |= FLOWI_FLAG_ANYSRC;
28 flags |= FLOWI_FLAG_ANYSRC; 28 else
29 else 29 saddr = 0;
30 saddr = 0;
31 }
32 30
33 /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause 31 /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
34 * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. 32 * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook.
@@ -38,7 +36,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
38 fl4.flowi4_tos = RT_TOS(iph->tos); 36 fl4.flowi4_tos = RT_TOS(iph->tos);
39 fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; 37 fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
40 fl4.flowi4_mark = skb->mark; 38 fl4.flowi4_mark = skb->mark;
41 fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : flags; 39 fl4.flowi4_flags = flags;
42 rt = ip_route_output_key(net, &fl4); 40 rt = ip_route_output_key(net, &fl4);
43 if (IS_ERR(rt)) 41 if (IS_ERR(rt))
44 return -1; 42 return -1;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 1dfc18a03fd..73b4e91a87e 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -113,6 +113,18 @@ config IP_NF_TARGET_REJECT
113 113
114 To compile it as a module, choose M here. If unsure, say N. 114 To compile it as a module, choose M here. If unsure, say N.
115 115
116config IP_NF_TARGET_REJECT_SKERR
117 bool "Force socket error when rejecting with icmp*"
118 depends on IP_NF_TARGET_REJECT
119 default n
120 help
121 This option enables turning a "--reject-with icmp*" into a matching
122 socket error also.
123 The REJECT target normally allows sending an ICMP message. But it
124 leaves the local socket unaware of any ingress rejects.
125
126 If unsure, say N.
127
116config IP_NF_TARGET_LOG 128config IP_NF_TARGET_LOG
117 tristate "LOG target support" 129 tristate "LOG target support"
118 default m if NETFILTER_ADVANCED=n 130 default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 5c9b9d96391..e59aabd0eae 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -218,6 +218,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
218 return skb; 218 return skb;
219 219
220nlmsg_failure: 220nlmsg_failure:
221 kfree_skb(skb);
221 *errp = -EINVAL; 222 *errp = -EINVAL;
222 printk(KERN_ERR "ip_queue: error creating packet message\n"); 223 printk(KERN_ERR "ip_queue: error creating packet message\n");
223 return NULL; 224 return NULL;
@@ -313,7 +314,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
313{ 314{
314 struct nf_queue_entry *entry; 315 struct nf_queue_entry *entry;
315 316
316 if (vmsg->value > NF_MAX_VERDICT) 317 if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)
317 return -EINVAL; 318 return -EINVAL;
318 319
319 entry = ipq_find_dequeue_entry(vmsg->id); 320 entry = ipq_find_dequeue_entry(vmsg->id);
@@ -358,12 +359,9 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg,
358 break; 359 break;
359 360
360 case IPQM_VERDICT: 361 case IPQM_VERDICT:
361 if (pmsg->msg.verdict.value > NF_MAX_VERDICT) 362 status = ipq_set_verdict(&pmsg->msg.verdict,
362 status = -EINVAL; 363 len - sizeof(*pmsg));
363 else 364 break;
364 status = ipq_set_verdict(&pmsg->msg.verdict,
365 len - sizeof(*pmsg));
366 break;
367 default: 365 default:
368 status = -EINVAL; 366 status = -EINVAL;
369 } 367 }
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 5c9e97c7901..db8d22db425 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -317,19 +317,19 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
317 hash = clusterip_hashfn(skb, cipinfo->config); 317 hash = clusterip_hashfn(skb, cipinfo->config);
318 318
319 switch (ctinfo) { 319 switch (ctinfo) {
320 case IP_CT_NEW: 320 case IP_CT_NEW:
321 ct->mark = hash; 321 ct->mark = hash;
322 break; 322 break;
323 case IP_CT_RELATED: 323 case IP_CT_RELATED:
324 case IP_CT_RELATED_REPLY: 324 case IP_CT_RELATED_REPLY:
325 /* FIXME: we don't handle expectations at the 325 /* FIXME: we don't handle expectations at the moment.
326 * moment. they can arrive on a different node than 326 * They can arrive on a different node than
327 * the master connection (e.g. FTP passive mode) */ 327 * the master connection (e.g. FTP passive mode) */
328 case IP_CT_ESTABLISHED: 328 case IP_CT_ESTABLISHED:
329 case IP_CT_ESTABLISHED_REPLY: 329 case IP_CT_ESTABLISHED_REPLY:
330 break; 330 break;
331 default: 331 default: /* Prevent gcc warnings */
332 break; 332 break;
333 } 333 }
334 334
335#ifdef DEBUG 335#ifdef DEBUG
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 51f13f8ec72..9dd754c7f2b 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -128,6 +128,14 @@ static void send_reset(struct sk_buff *oldskb, int hook)
128static inline void send_unreach(struct sk_buff *skb_in, int code) 128static inline void send_unreach(struct sk_buff *skb_in, int code)
129{ 129{
130 icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); 130 icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
131#ifdef CONFIG_IP_NF_TARGET_REJECT_SKERR
132 if (skb_in->sk) {
133 skb_in->sk->sk_err = icmp_err_convert[code].errno;
134 skb_in->sk->sk_error_report(skb_in->sk);
135 pr_debug("ipt_REJECT: sk_err=%d for skb=%p sk=%p\n",
136 skb_in->sk->sk_err, skb_in, skb_in->sk);
137 }
138#endif
131} 139}
132 140
133static unsigned int 141static unsigned int
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index f3a9b42b16c..9bb1b8a37a2 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -82,7 +82,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
82#endif 82#endif
83#endif 83#endif
84 /* Gather fragments. */ 84 /* Gather fragments. */
85 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 85 if (ip_is_fragment(ip_hdr(skb))) {
86 enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb); 86 enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb);
87 if (nf_ct_ipv4_gather_frags(skb, user)) 87 if (nf_ct_ipv4_gather_frags(skb, user))
88 return NF_STOLEN; 88 return NF_STOLEN;
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
index 3e61faf23a9..f52d41ea069 100644
--- a/net/ipv4/netfilter/nf_nat_proto_common.c
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -12,6 +12,7 @@
12#include <linux/ip.h> 12#include <linux/ip.h>
13 13
14#include <linux/netfilter.h> 14#include <linux/netfilter.h>
15#include <net/secure_seq.h>
15#include <net/netfilter/nf_nat.h> 16#include <net/netfilter/nf_nat.h>
16#include <net/netfilter/nf_nat_core.h> 17#include <net/netfilter/nf_nat_core.h>
17#include <net/netfilter/nf_nat_rule.h> 18#include <net/netfilter/nf_nat_rule.h>
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 8812a02078a..076b7c8c4aa 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -719,117 +719,115 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
719 719
720 l = 0; 720 l = 0;
721 switch (type) { 721 switch (type) {
722 case SNMP_INTEGER: 722 case SNMP_INTEGER:
723 len = sizeof(long); 723 len = sizeof(long);
724 if (!asn1_long_decode(ctx, end, &l)) { 724 if (!asn1_long_decode(ctx, end, &l)) {
725 kfree(id); 725 kfree(id);
726 return 0; 726 return 0;
727 } 727 }
728 *obj = kmalloc(sizeof(struct snmp_object) + len, 728 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
729 GFP_ATOMIC); 729 if (*obj == NULL) {
730 if (*obj == NULL) { 730 kfree(id);
731 kfree(id); 731 if (net_ratelimit())
732 if (net_ratelimit()) 732 pr_notice("OOM in bsalg (%d)\n", __LINE__);
733 pr_notice("OOM in bsalg (%d)\n", __LINE__); 733 return 0;
734 return 0; 734 }
735 } 735 (*obj)->syntax.l[0] = l;
736 (*obj)->syntax.l[0] = l; 736 break;
737 break; 737 case SNMP_OCTETSTR:
738 case SNMP_OCTETSTR: 738 case SNMP_OPAQUE:
739 case SNMP_OPAQUE: 739 if (!asn1_octets_decode(ctx, end, &p, &len)) {
740 if (!asn1_octets_decode(ctx, end, &p, &len)) { 740 kfree(id);
741 kfree(id); 741 return 0;
742 return 0; 742 }
743 } 743 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
744 *obj = kmalloc(sizeof(struct snmp_object) + len, 744 if (*obj == NULL) {
745 GFP_ATOMIC);
746 if (*obj == NULL) {
747 kfree(p);
748 kfree(id);
749 if (net_ratelimit())
750 pr_notice("OOM in bsalg (%d)\n", __LINE__);
751 return 0;
752 }
753 memcpy((*obj)->syntax.c, p, len);
754 kfree(p); 745 kfree(p);
755 break; 746 kfree(id);
756 case SNMP_NULL: 747 if (net_ratelimit())
757 case SNMP_NOSUCHOBJECT: 748 pr_notice("OOM in bsalg (%d)\n", __LINE__);
758 case SNMP_NOSUCHINSTANCE: 749 return 0;
759 case SNMP_ENDOFMIBVIEW: 750 }
760 len = 0; 751 memcpy((*obj)->syntax.c, p, len);
761 *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); 752 kfree(p);
762 if (*obj == NULL) { 753 break;
763 kfree(id); 754 case SNMP_NULL:
764 if (net_ratelimit()) 755 case SNMP_NOSUCHOBJECT:
765 pr_notice("OOM in bsalg (%d)\n", __LINE__); 756 case SNMP_NOSUCHINSTANCE:
766 return 0; 757 case SNMP_ENDOFMIBVIEW:
767 } 758 len = 0;
768 if (!asn1_null_decode(ctx, end)) { 759 *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
769 kfree(id); 760 if (*obj == NULL) {
770 kfree(*obj); 761 kfree(id);
771 *obj = NULL; 762 if (net_ratelimit())
772 return 0; 763 pr_notice("OOM in bsalg (%d)\n", __LINE__);
773 } 764 return 0;
774 break; 765 }
775 case SNMP_OBJECTID: 766 if (!asn1_null_decode(ctx, end)) {
776 if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) { 767 kfree(id);
777 kfree(id); 768 kfree(*obj);
778 return 0; 769 *obj = NULL;
779 } 770 return 0;
780 len *= sizeof(unsigned long); 771 }
781 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); 772 break;
782 if (*obj == NULL) { 773 case SNMP_OBJECTID:
783 kfree(lp); 774 if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) {
784 kfree(id); 775 kfree(id);
785 if (net_ratelimit()) 776 return 0;
786 pr_notice("OOM in bsalg (%d)\n", __LINE__); 777 }
787 return 0; 778 len *= sizeof(unsigned long);
788 } 779 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
789 memcpy((*obj)->syntax.ul, lp, len); 780 if (*obj == NULL) {
790 kfree(lp); 781 kfree(lp);
791 break; 782 kfree(id);
792 case SNMP_IPADDR: 783 if (net_ratelimit())
793 if (!asn1_octets_decode(ctx, end, &p, &len)) { 784 pr_notice("OOM in bsalg (%d)\n", __LINE__);
794 kfree(id); 785 return 0;
795 return 0; 786 }
796 } 787 memcpy((*obj)->syntax.ul, lp, len);
797 if (len != 4) { 788 kfree(lp);
798 kfree(p); 789 break;
799 kfree(id); 790 case SNMP_IPADDR:
800 return 0; 791 if (!asn1_octets_decode(ctx, end, &p, &len)) {
801 } 792 kfree(id);
802 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); 793 return 0;
803 if (*obj == NULL) { 794 }
804 kfree(p); 795 if (len != 4) {
805 kfree(id);
806 if (net_ratelimit())
807 pr_notice("OOM in bsalg (%d)\n", __LINE__);
808 return 0;
809 }
810 memcpy((*obj)->syntax.uc, p, len);
811 kfree(p); 796 kfree(p);
812 break;
813 case SNMP_COUNTER:
814 case SNMP_GAUGE:
815 case SNMP_TIMETICKS:
816 len = sizeof(unsigned long);
817 if (!asn1_ulong_decode(ctx, end, &ul)) {
818 kfree(id);
819 return 0;
820 }
821 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
822 if (*obj == NULL) {
823 kfree(id);
824 if (net_ratelimit())
825 pr_notice("OOM in bsalg (%d)\n", __LINE__);
826 return 0;
827 }
828 (*obj)->syntax.ul[0] = ul;
829 break;
830 default:
831 kfree(id); 797 kfree(id);
832 return 0; 798 return 0;
799 }
800 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
801 if (*obj == NULL) {
802 kfree(p);
803 kfree(id);
804 if (net_ratelimit())
805 pr_notice("OOM in bsalg (%d)\n", __LINE__);
806 return 0;
807 }
808 memcpy((*obj)->syntax.uc, p, len);
809 kfree(p);
810 break;
811 case SNMP_COUNTER:
812 case SNMP_GAUGE:
813 case SNMP_TIMETICKS:
814 len = sizeof(unsigned long);
815 if (!asn1_ulong_decode(ctx, end, &ul)) {
816 kfree(id);
817 return 0;
818 }
819 *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
820 if (*obj == NULL) {
821 kfree(id);
822 if (net_ratelimit())
823 pr_notice("OOM in bsalg (%d)\n", __LINE__);
824 return 0;
825 }
826 (*obj)->syntax.ul[0] = ul;
827 break;
828 default:
829 kfree(id);
830 return 0;
833 } 831 }
834 832
835 (*obj)->syntax_len = len; 833 (*obj)->syntax_len = len;
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 483b76d042d..a6e606e8482 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -88,7 +88,7 @@ nf_nat_fn(unsigned int hooknum,
88 88
89 /* We never see fragments: conntrack defrags on pre-routing 89 /* We never see fragments: conntrack defrags on pre-routing
90 and local-out, and nf_nat_out protects post-routing. */ 90 and local-out, and nf_nat_out protects post-routing. */
91 NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); 91 NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
92 92
93 ct = nf_ct_get(skb, &ctinfo); 93 ct = nf_ct_get(skb, &ctinfo);
94 /* Can't track? It's not due to stress, or conntrack would 94 /* Can't track? It's not due to stress, or conntrack would
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index b14ec7d03b6..4bfad5da94f 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -254,6 +254,8 @@ static const struct snmp_mib snmp4_net_list[] = {
254 SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), 254 SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
255 SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), 255 SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER),
256 SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW), 256 SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW),
257 SNMP_MIB_ITEM("TCPReqQFullDoCookies", LINUX_MIB_TCPREQQFULLDOCOOKIES),
258 SNMP_MIB_ITEM("TCPReqQFullDrop", LINUX_MIB_TCPREQQFULLDROP),
257 SNMP_MIB_SENTINEL 259 SNMP_MIB_SENTINEL
258}; 260};
259 261
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index c9893d43242..61714bd5292 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -38,7 +38,7 @@
38 */ 38 */
39 39
40#include <linux/types.h> 40#include <linux/types.h>
41#include <asm/atomic.h> 41#include <linux/atomic.h>
42#include <asm/byteorder.h> 42#include <asm/byteorder.h>
43#include <asm/current.h> 43#include <asm/current.h>
44#include <asm/uaccess.h> 44#include <asm/uaccess.h>
@@ -563,7 +563,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
563 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, 563 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
564 RT_SCOPE_UNIVERSE, 564 RT_SCOPE_UNIVERSE,
565 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, 565 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
566 FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0); 566 inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP,
567 daddr, saddr, 0, 0);
567 568
568 if (!inet->hdrincl) { 569 if (!inet->hdrincl) {
569 err = raw_probe_proto_opt(&fl4, msg); 570 err = raw_probe_proto_opt(&fl4, msg);
@@ -825,28 +826,28 @@ static int compat_raw_getsockopt(struct sock *sk, int level, int optname,
825static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) 826static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
826{ 827{
827 switch (cmd) { 828 switch (cmd) {
828 case SIOCOUTQ: { 829 case SIOCOUTQ: {
829 int amount = sk_wmem_alloc_get(sk); 830 int amount = sk_wmem_alloc_get(sk);
830 831
831 return put_user(amount, (int __user *)arg); 832 return put_user(amount, (int __user *)arg);
832 } 833 }
833 case SIOCINQ: { 834 case SIOCINQ: {
834 struct sk_buff *skb; 835 struct sk_buff *skb;
835 int amount = 0; 836 int amount = 0;
836 837
837 spin_lock_bh(&sk->sk_receive_queue.lock); 838 spin_lock_bh(&sk->sk_receive_queue.lock);
838 skb = skb_peek(&sk->sk_receive_queue); 839 skb = skb_peek(&sk->sk_receive_queue);
839 if (skb != NULL) 840 if (skb != NULL)
840 amount = skb->len; 841 amount = skb->len;
841 spin_unlock_bh(&sk->sk_receive_queue.lock); 842 spin_unlock_bh(&sk->sk_receive_queue.lock);
842 return put_user(amount, (int __user *)arg); 843 return put_user(amount, (int __user *)arg);
843 } 844 }
844 845
845 default: 846 default:
846#ifdef CONFIG_IP_MROUTE 847#ifdef CONFIG_IP_MROUTE
847 return ipmr_ioctl(sk, cmd, (void __user *)arg); 848 return ipmr_ioctl(sk, cmd, (void __user *)arg);
848#else 849#else
849 return -ENOIOCTLCMD; 850 return -ENOIOCTLCMD;
850#endif 851#endif
851 } 852 }
852} 853}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index aa13ef10511..b5638545deb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -91,6 +91,7 @@
91#include <linux/rcupdate.h> 91#include <linux/rcupdate.h>
92#include <linux/times.h> 92#include <linux/times.h>
93#include <linux/slab.h> 93#include <linux/slab.h>
94#include <linux/prefetch.h>
94#include <net/dst.h> 95#include <net/dst.h>
95#include <net/net_namespace.h> 96#include <net/net_namespace.h>
96#include <net/protocol.h> 97#include <net/protocol.h>
@@ -108,9 +109,11 @@
108#ifdef CONFIG_SYSCTL 109#ifdef CONFIG_SYSCTL
109#include <linux/sysctl.h> 110#include <linux/sysctl.h>
110#endif 111#endif
112#include <net/atmclip.h>
113#include <net/secure_seq.h>
111 114
112#define RT_FL_TOS(oldflp4) \ 115#define RT_FL_TOS(oldflp4) \
113 ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) 116 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
114 117
115#define IP_MAX_MTU 0xFFF0 118#define IP_MAX_MTU 0xFFF0
116 119
@@ -130,6 +133,10 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
130static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; 133static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
131static int ip_rt_min_advmss __read_mostly = 256; 134static int ip_rt_min_advmss __read_mostly = 256;
132static int rt_chain_length_max __read_mostly = 20; 135static int rt_chain_length_max __read_mostly = 20;
136static int redirect_genid;
137
138static struct delayed_work expires_work;
139static unsigned long expires_ljiffies;
133 140
134/* 141/*
135 * Interface to generic destination cache. 142 * Interface to generic destination cache.
@@ -184,6 +191,8 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
184 return p; 191 return p;
185} 192}
186 193
194static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr);
195
187static struct dst_ops ipv4_dst_ops = { 196static struct dst_ops ipv4_dst_ops = {
188 .family = AF_INET, 197 .family = AF_INET,
189 .protocol = cpu_to_be16(ETH_P_IP), 198 .protocol = cpu_to_be16(ETH_P_IP),
@@ -198,6 +207,7 @@ static struct dst_ops ipv4_dst_ops = {
198 .link_failure = ipv4_link_failure, 207 .link_failure = ipv4_link_failure,
199 .update_pmtu = ip_rt_update_pmtu, 208 .update_pmtu = ip_rt_update_pmtu,
200 .local_out = __ip_local_out, 209 .local_out = __ip_local_out,
210 .neigh_lookup = ipv4_neigh_lookup,
201}; 211};
202 212
203#define ECN_OR_COST(class) TC_PRIO_##class 213#define ECN_OR_COST(class) TC_PRIO_##class
@@ -411,7 +421,13 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
411 "HHUptod\tSpecDst"); 421 "HHUptod\tSpecDst");
412 else { 422 else {
413 struct rtable *r = v; 423 struct rtable *r = v;
414 int len; 424 struct neighbour *n;
425 int len, HHUptod;
426
427 rcu_read_lock();
428 n = dst_get_neighbour(&r->dst);
429 HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0;
430 rcu_read_unlock();
415 431
416 seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" 432 seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
417 "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", 433 "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
@@ -425,9 +441,8 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
425 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + 441 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
426 dst_metric(&r->dst, RTAX_RTTVAR)), 442 dst_metric(&r->dst, RTAX_RTTVAR)),
427 r->rt_key_tos, 443 r->rt_key_tos,
428 r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, 444 -1,
429 r->dst.hh ? (r->dst.hh->hh_output == 445 HHUptod,
430 dev_queue_xmit) : 0,
431 r->rt_spec_dst, &len); 446 r->rt_spec_dst, &len);
432 447
433 seq_printf(seq, "%*s\n", 127 - len, ""); 448 seq_printf(seq, "%*s\n", 127 - len, "");
@@ -716,7 +731,7 @@ static inline bool compare_hash_inputs(const struct rtable *rt1,
716{ 731{
717 return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | 732 return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
718 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | 733 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
719 (rt1->rt_iif ^ rt2->rt_iif)) == 0); 734 (rt1->rt_route_iif ^ rt2->rt_route_iif)) == 0);
720} 735}
721 736
722static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) 737static inline int compare_keys(struct rtable *rt1, struct rtable *rt2)
@@ -725,8 +740,8 @@ static inline int compare_keys(struct rtable *rt1, struct rtable *rt2)
725 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | 740 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
726 (rt1->rt_mark ^ rt2->rt_mark) | 741 (rt1->rt_mark ^ rt2->rt_mark) |
727 (rt1->rt_key_tos ^ rt2->rt_key_tos) | 742 (rt1->rt_key_tos ^ rt2->rt_key_tos) |
728 (rt1->rt_oif ^ rt2->rt_oif) | 743 (rt1->rt_route_iif ^ rt2->rt_route_iif) |
729 (rt1->rt_iif ^ rt2->rt_iif)) == 0; 744 (rt1->rt_oif ^ rt2->rt_oif)) == 0;
730} 745}
731 746
732static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) 747static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
@@ -820,6 +835,97 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
820 return ONE; 835 return ONE;
821} 836}
822 837
838static void rt_check_expire(void)
839{
840 static unsigned int rover;
841 unsigned int i = rover, goal;
842 struct rtable *rth;
843 struct rtable __rcu **rthp;
844 unsigned long samples = 0;
845 unsigned long sum = 0, sum2 = 0;
846 unsigned long delta;
847 u64 mult;
848
849 delta = jiffies - expires_ljiffies;
850 expires_ljiffies = jiffies;
851 mult = ((u64)delta) << rt_hash_log;
852 if (ip_rt_gc_timeout > 1)
853 do_div(mult, ip_rt_gc_timeout);
854 goal = (unsigned int)mult;
855 if (goal > rt_hash_mask)
856 goal = rt_hash_mask + 1;
857 for (; goal > 0; goal--) {
858 unsigned long tmo = ip_rt_gc_timeout;
859 unsigned long length;
860
861 i = (i + 1) & rt_hash_mask;
862 rthp = &rt_hash_table[i].chain;
863
864 if (need_resched())
865 cond_resched();
866
867 samples++;
868
869 if (rcu_dereference_raw(*rthp) == NULL)
870 continue;
871 length = 0;
872 spin_lock_bh(rt_hash_lock_addr(i));
873 while ((rth = rcu_dereference_protected(*rthp,
874 lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
875 prefetch(rth->dst.rt_next);
876 if (rt_is_expired(rth)) {
877 *rthp = rth->dst.rt_next;
878 rt_free(rth);
879 continue;
880 }
881 if (rth->dst.expires) {
882 /* Entry is expired even if it is in use */
883 if (time_before_eq(jiffies, rth->dst.expires)) {
884nofree:
885 tmo >>= 1;
886 rthp = &rth->dst.rt_next;
887 /*
888 * We only count entries on
889 * a chain with equal hash inputs once
890 * so that entries for different QOS
891 * levels, and other non-hash input
892 * attributes don't unfairly skew
893 * the length computation
894 */
895 length += has_noalias(rt_hash_table[i].chain, rth);
896 continue;
897 }
898 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
899 goto nofree;
900
901 /* Cleanup aged off entries. */
902 *rthp = rth->dst.rt_next;
903 rt_free(rth);
904 }
905 spin_unlock_bh(rt_hash_lock_addr(i));
906 sum += length;
907 sum2 += length*length;
908 }
909 if (samples) {
910 unsigned long avg = sum / samples;
911 unsigned long sd = int_sqrt(sum2 / samples - avg*avg);
912 rt_chain_length_max = max_t(unsigned long,
913 ip_rt_gc_elasticity,
914 (avg + 4*sd) >> FRACT_BITS);
915 }
916 rover = i;
917}
918
919/*
920 * rt_worker_func() is run in process context.
921 * we call rt_check_expire() to scan part of the hash table
922 */
923static void rt_worker_func(struct work_struct *work)
924{
925 rt_check_expire();
926 schedule_delayed_work(&expires_work, ip_rt_gc_interval);
927}
928
823/* 929/*
824 * Perturbation of rt_genid by a small quantity [1..256] 930 * Perturbation of rt_genid by a small quantity [1..256]
825 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() 931 * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
@@ -832,6 +938,7 @@ static void rt_cache_invalidate(struct net *net)
832 938
833 get_random_bytes(&shuffle, sizeof(shuffle)); 939 get_random_bytes(&shuffle, sizeof(shuffle));
834 atomic_add(shuffle + 1U, &net->ipv4.rt_genid); 940 atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
941 redirect_genid++;
835} 942}
836 943
837/* 944/*
@@ -1006,6 +1113,37 @@ static int slow_chain_length(const struct rtable *head)
1006 return length >> FRACT_BITS; 1113 return length >> FRACT_BITS;
1007} 1114}
1008 1115
1116static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr)
1117{
1118 struct neigh_table *tbl = &arp_tbl;
1119 static const __be32 inaddr_any = 0;
1120 struct net_device *dev = dst->dev;
1121 const __be32 *pkey = daddr;
1122 struct neighbour *n;
1123
1124#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
1125 if (dev->type == ARPHRD_ATM)
1126 tbl = clip_tbl_hook;
1127#endif
1128 if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
1129 pkey = &inaddr_any;
1130
1131 n = __ipv4_neigh_lookup(tbl, dev, *(__force u32 *)pkey);
1132 if (n)
1133 return n;
1134 return neigh_create(tbl, pkey, dev);
1135}
1136
1137static int rt_bind_neighbour(struct rtable *rt)
1138{
1139 struct neighbour *n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway);
1140 if (IS_ERR(n))
1141 return PTR_ERR(n);
1142 dst_set_neighbour(&rt->dst, n);
1143
1144 return 0;
1145}
1146
1009static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt, 1147static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt,
1010 struct sk_buff *skb, int ifindex) 1148 struct sk_buff *skb, int ifindex)
1011{ 1149{
@@ -1042,7 +1180,7 @@ restart:
1042 1180
1043 rt->dst.flags |= DST_NOCACHE; 1181 rt->dst.flags |= DST_NOCACHE;
1044 if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { 1182 if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) {
1045 int err = arp_bind_neighbour(&rt->dst); 1183 int err = rt_bind_neighbour(rt);
1046 if (err) { 1184 if (err) {
1047 if (net_ratelimit()) 1185 if (net_ratelimit())
1048 printk(KERN_WARNING 1186 printk(KERN_WARNING
@@ -1138,7 +1276,7 @@ restart:
1138 route or unicast forwarding path. 1276 route or unicast forwarding path.
1139 */ 1277 */
1140 if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { 1278 if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) {
1141 int err = arp_bind_neighbour(&rt->dst); 1279 int err = rt_bind_neighbour(rt);
1142 if (err) { 1280 if (err) {
1143 spin_unlock_bh(rt_hash_lock_addr(hash)); 1281 spin_unlock_bh(rt_hash_lock_addr(hash));
1144 1282
@@ -1229,7 +1367,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
1229{ 1367{
1230 struct rtable *rt = (struct rtable *) dst; 1368 struct rtable *rt = (struct rtable *) dst;
1231 1369
1232 if (rt) { 1370 if (rt && !(rt->dst.flags & DST_NOPEER)) {
1233 if (rt->peer == NULL) 1371 if (rt->peer == NULL)
1234 rt_bind_peer(rt, rt->rt_dst, 1); 1372 rt_bind_peer(rt, rt->rt_dst, 1);
1235 1373
@@ -1240,7 +1378,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
1240 iph->id = htons(inet_getid(rt->peer, more)); 1378 iph->id = htons(inet_getid(rt->peer, more));
1241 return; 1379 return;
1242 } 1380 }
1243 } else 1381 } else if (!rt)
1244 printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", 1382 printk(KERN_DEBUG "rt_bind_peer(0) @%p\n",
1245 __builtin_return_address(0)); 1383 __builtin_return_address(0));
1246 1384
@@ -1268,11 +1406,40 @@ static void rt_del(unsigned hash, struct rtable *rt)
1268 spin_unlock_bh(rt_hash_lock_addr(hash)); 1406 spin_unlock_bh(rt_hash_lock_addr(hash));
1269} 1407}
1270 1408
1409static void check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
1410{
1411 struct rtable *rt = (struct rtable *) dst;
1412 __be32 orig_gw = rt->rt_gateway;
1413 struct neighbour *n, *old_n;
1414
1415 dst_confirm(&rt->dst);
1416
1417 rt->rt_gateway = peer->redirect_learned.a4;
1418
1419 n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway);
1420 if (IS_ERR(n)) {
1421 rt->rt_gateway = orig_gw;
1422 return;
1423 }
1424 old_n = xchg(&rt->dst._neighbour, n);
1425 if (old_n)
1426 neigh_release(old_n);
1427 if (!(n->nud_state & NUD_VALID)) {
1428 neigh_event_send(n, NULL);
1429 } else {
1430 rt->rt_flags |= RTCF_REDIRECTED;
1431 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
1432 }
1433}
1434
1271/* called in rcu_read_lock() section */ 1435/* called in rcu_read_lock() section */
1272void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, 1436void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1273 __be32 saddr, struct net_device *dev) 1437 __be32 saddr, struct net_device *dev)
1274{ 1438{
1439 int s, i;
1275 struct in_device *in_dev = __in_dev_get_rcu(dev); 1440 struct in_device *in_dev = __in_dev_get_rcu(dev);
1441 __be32 skeys[2] = { saddr, 0 };
1442 int ikeys[2] = { dev->ifindex, 0 };
1276 struct inet_peer *peer; 1443 struct inet_peer *peer;
1277 struct net *net; 1444 struct net *net;
1278 1445
@@ -1295,13 +1462,45 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1295 goto reject_redirect; 1462 goto reject_redirect;
1296 } 1463 }
1297 1464
1298 peer = inet_getpeer_v4(daddr, 1); 1465 for (s = 0; s < 2; s++) {
1299 if (peer) { 1466 for (i = 0; i < 2; i++) {
1300 peer->redirect_learned.a4 = new_gw; 1467 unsigned int hash;
1468 struct rtable __rcu **rthp;
1469 struct rtable *rt;
1301 1470
1302 inet_putpeer(peer); 1471 hash = rt_hash(daddr, skeys[s], ikeys[i], rt_genid(net));
1472
1473 rthp = &rt_hash_table[hash].chain;
1303 1474
1304 atomic_inc(&__rt_peer_genid); 1475 while ((rt = rcu_dereference(*rthp)) != NULL) {
1476 rthp = &rt->dst.rt_next;
1477
1478 if (rt->rt_key_dst != daddr ||
1479 rt->rt_key_src != skeys[s] ||
1480 rt->rt_oif != ikeys[i] ||
1481 rt_is_input_route(rt) ||
1482 rt_is_expired(rt) ||
1483 !net_eq(dev_net(rt->dst.dev), net) ||
1484 rt->dst.error ||
1485 rt->dst.dev != dev ||
1486 rt->rt_gateway != old_gw)
1487 continue;
1488
1489 if (!rt->peer)
1490 rt_bind_peer(rt, rt->rt_dst, 1);
1491
1492 peer = rt->peer;
1493 if (peer) {
1494 if (peer->redirect_learned.a4 != new_gw ||
1495 peer->redirect_genid != redirect_genid) {
1496 peer->redirect_learned.a4 = new_gw;
1497 peer->redirect_genid = redirect_genid;
1498 atomic_inc(&__rt_peer_genid);
1499 }
1500 check_peer_redir(&rt->dst, peer);
1501 }
1502 }
1503 }
1305 } 1504 }
1306 return; 1505 return;
1307 1506
@@ -1439,20 +1638,20 @@ static int ip_error(struct sk_buff *skb)
1439 int code; 1638 int code;
1440 1639
1441 switch (rt->dst.error) { 1640 switch (rt->dst.error) {
1442 case EINVAL: 1641 case EINVAL:
1443 default: 1642 default:
1444 goto out; 1643 goto out;
1445 case EHOSTUNREACH: 1644 case EHOSTUNREACH:
1446 code = ICMP_HOST_UNREACH; 1645 code = ICMP_HOST_UNREACH;
1447 break; 1646 break;
1448 case ENETUNREACH: 1647 case ENETUNREACH:
1449 code = ICMP_NET_UNREACH; 1648 code = ICMP_NET_UNREACH;
1450 IP_INC_STATS_BH(dev_net(rt->dst.dev), 1649 IP_INC_STATS_BH(dev_net(rt->dst.dev),
1451 IPSTATS_MIB_INNOROUTES); 1650 IPSTATS_MIB_INNOROUTES);
1452 break; 1651 break;
1453 case EACCES: 1652 case EACCES:
1454 code = ICMP_PKT_FILTERED; 1653 code = ICMP_PKT_FILTERED;
1455 break; 1654 break;
1456 } 1655 }
1457 1656
1458 if (!rt->peer) 1657 if (!rt->peer)
@@ -1531,11 +1730,10 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
1531 est_mtu = mtu; 1730 est_mtu = mtu;
1532 peer->pmtu_learned = mtu; 1731 peer->pmtu_learned = mtu;
1533 peer->pmtu_expires = pmtu_expires; 1732 peer->pmtu_expires = pmtu_expires;
1733 atomic_inc(&__rt_peer_genid);
1534 } 1734 }
1535 1735
1536 inet_putpeer(peer); 1736 inet_putpeer(peer);
1537
1538 atomic_inc(&__rt_peer_genid);
1539 } 1737 }
1540 return est_mtu ? : new_mtu; 1738 return est_mtu ? : new_mtu;
1541} 1739}
@@ -1588,37 +1786,9 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1588 } 1786 }
1589} 1787}
1590 1788
1591static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
1592{
1593 struct rtable *rt = (struct rtable *) dst;
1594 __be32 orig_gw = rt->rt_gateway;
1595
1596 dst_confirm(&rt->dst);
1597
1598 neigh_release(rt->dst.neighbour);
1599 rt->dst.neighbour = NULL;
1600
1601 rt->rt_gateway = peer->redirect_learned.a4;
1602 if (arp_bind_neighbour(&rt->dst) ||
1603 !(rt->dst.neighbour->nud_state & NUD_VALID)) {
1604 if (rt->dst.neighbour)
1605 neigh_event_send(rt->dst.neighbour, NULL);
1606 rt->rt_gateway = orig_gw;
1607 return -EAGAIN;
1608 } else {
1609 rt->rt_flags |= RTCF_REDIRECTED;
1610 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE,
1611 rt->dst.neighbour);
1612 }
1613 return 0;
1614}
1615 1789
1616static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) 1790static void ipv4_validate_peer(struct rtable *rt)
1617{ 1791{
1618 struct rtable *rt = (struct rtable *) dst;
1619
1620 if (rt_is_expired(rt))
1621 return NULL;
1622 if (rt->rt_peer_genid != rt_peer_genid()) { 1792 if (rt->rt_peer_genid != rt_peer_genid()) {
1623 struct inet_peer *peer; 1793 struct inet_peer *peer;
1624 1794
@@ -1627,17 +1797,26 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1627 1797
1628 peer = rt->peer; 1798 peer = rt->peer;
1629 if (peer) { 1799 if (peer) {
1630 check_peer_pmtu(dst, peer); 1800 check_peer_pmtu(&rt->dst, peer);
1631 1801
1802 if (peer->redirect_genid != redirect_genid)
1803 peer->redirect_learned.a4 = 0;
1632 if (peer->redirect_learned.a4 && 1804 if (peer->redirect_learned.a4 &&
1633 peer->redirect_learned.a4 != rt->rt_gateway) { 1805 peer->redirect_learned.a4 != rt->rt_gateway)
1634 if (check_peer_redir(dst, peer)) 1806 check_peer_redir(&rt->dst, peer);
1635 return NULL;
1636 }
1637 } 1807 }
1638 1808
1639 rt->rt_peer_genid = rt_peer_genid(); 1809 rt->rt_peer_genid = rt_peer_genid();
1640 } 1810 }
1811}
1812
1813static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1814{
1815 struct rtable *rt = (struct rtable *) dst;
1816
1817 if (rt_is_expired(rt))
1818 return NULL;
1819 ipv4_validate_peer(rt);
1641 return dst; 1820 return dst;
1642} 1821}
1643 1822
@@ -1703,7 +1882,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
1703 memset(&fl4, 0, sizeof(fl4)); 1882 memset(&fl4, 0, sizeof(fl4));
1704 fl4.daddr = iph->daddr; 1883 fl4.daddr = iph->daddr;
1705 fl4.saddr = iph->saddr; 1884 fl4.saddr = iph->saddr;
1706 fl4.flowi4_tos = iph->tos; 1885 fl4.flowi4_tos = RT_TOS(iph->tos);
1707 fl4.flowi4_oif = rt->dst.dev->ifindex; 1886 fl4.flowi4_oif = rt->dst.dev->ifindex;
1708 fl4.flowi4_iif = skb->dev->ifindex; 1887 fl4.flowi4_iif = skb->dev->ifindex;
1709 fl4.flowi4_mark = skb->mark; 1888 fl4.flowi4_mark = skb->mark;
@@ -1780,6 +1959,8 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
1780 dst_init_metrics(&rt->dst, peer->metrics, false); 1959 dst_init_metrics(&rt->dst, peer->metrics, false);
1781 1960
1782 check_peer_pmtu(&rt->dst, peer); 1961 check_peer_pmtu(&rt->dst, peer);
1962 if (peer->redirect_genid != redirect_genid)
1963 peer->redirect_learned.a4 = 0;
1783 if (peer->redirect_learned.a4 && 1964 if (peer->redirect_learned.a4 &&
1784 peer->redirect_learned.a4 != rt->rt_gateway) { 1965 peer->redirect_learned.a4 != rt->rt_gateway) {
1785 rt->rt_gateway = peer->redirect_learned.a4; 1966 rt->rt_gateway = peer->redirect_learned.a4;
@@ -2280,12 +2461,12 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2280 rth = rcu_dereference(rth->dst.rt_next)) { 2461 rth = rcu_dereference(rth->dst.rt_next)) {
2281 if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) | 2462 if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) |
2282 ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | 2463 ((__force u32)rth->rt_key_src ^ (__force u32)saddr) |
2283 (rth->rt_iif ^ iif) | 2464 (rth->rt_route_iif ^ iif) |
2284 rth->rt_oif |
2285 (rth->rt_key_tos ^ tos)) == 0 && 2465 (rth->rt_key_tos ^ tos)) == 0 &&
2286 rth->rt_mark == skb->mark && 2466 rth->rt_mark == skb->mark &&
2287 net_eq(dev_net(rth->dst.dev), net) && 2467 net_eq(dev_net(rth->dst.dev), net) &&
2288 !rt_is_expired(rth)) { 2468 !rt_is_expired(rth)) {
2469 ipv4_validate_peer(rth);
2289 if (noref) { 2470 if (noref) {
2290 dst_use_noref(&rth->dst, jiffies); 2471 dst_use_noref(&rth->dst, jiffies);
2291 skb_dst_set_noref(skb, &rth->dst); 2472 skb_dst_set_noref(skb, &rth->dst);
@@ -2344,11 +2525,11 @@ EXPORT_SYMBOL(ip_route_input_common);
2344static struct rtable *__mkroute_output(const struct fib_result *res, 2525static struct rtable *__mkroute_output(const struct fib_result *res,
2345 const struct flowi4 *fl4, 2526 const struct flowi4 *fl4,
2346 __be32 orig_daddr, __be32 orig_saddr, 2527 __be32 orig_daddr, __be32 orig_saddr,
2347 int orig_oif, struct net_device *dev_out, 2528 int orig_oif, __u8 orig_rtos,
2529 struct net_device *dev_out,
2348 unsigned int flags) 2530 unsigned int flags)
2349{ 2531{
2350 struct fib_info *fi = res->fi; 2532 struct fib_info *fi = res->fi;
2351 u32 tos = RT_FL_TOS(fl4);
2352 struct in_device *in_dev; 2533 struct in_device *in_dev;
2353 u16 type = res->type; 2534 u16 type = res->type;
2354 struct rtable *rth; 2535 struct rtable *rth;
@@ -2399,7 +2580,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2399 rth->rt_genid = rt_genid(dev_net(dev_out)); 2580 rth->rt_genid = rt_genid(dev_net(dev_out));
2400 rth->rt_flags = flags; 2581 rth->rt_flags = flags;
2401 rth->rt_type = type; 2582 rth->rt_type = type;
2402 rth->rt_key_tos = tos; 2583 rth->rt_key_tos = orig_rtos;
2403 rth->rt_dst = fl4->daddr; 2584 rth->rt_dst = fl4->daddr;
2404 rth->rt_src = fl4->saddr; 2585 rth->rt_src = fl4->saddr;
2405 rth->rt_route_iif = 0; 2586 rth->rt_route_iif = 0;
@@ -2449,7 +2630,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2449static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) 2630static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
2450{ 2631{
2451 struct net_device *dev_out = NULL; 2632 struct net_device *dev_out = NULL;
2452 u32 tos = RT_FL_TOS(fl4); 2633 __u8 tos = RT_FL_TOS(fl4);
2453 unsigned int flags = 0; 2634 unsigned int flags = 0;
2454 struct fib_result res; 2635 struct fib_result res;
2455 struct rtable *rth; 2636 struct rtable *rth;
@@ -2625,7 +2806,7 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
2625 2806
2626make_route: 2807make_route:
2627 rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif, 2808 rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif,
2628 dev_out, flags); 2809 tos, dev_out, flags);
2629 if (!IS_ERR(rth)) { 2810 if (!IS_ERR(rth)) {
2630 unsigned int hash; 2811 unsigned int hash;
2631 2812
@@ -2661,6 +2842,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4)
2661 (IPTOS_RT_MASK | RTO_ONLINK)) && 2842 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2662 net_eq(dev_net(rth->dst.dev), net) && 2843 net_eq(dev_net(rth->dst.dev), net) &&
2663 !rt_is_expired(rth)) { 2844 !rt_is_expired(rth)) {
2845 ipv4_validate_peer(rth);
2664 dst_use(&rth->dst, jiffies); 2846 dst_use(&rth->dst, jiffies);
2665 RT_CACHE_STAT_INC(out_hit); 2847 RT_CACHE_STAT_INC(out_hit);
2666 rcu_read_unlock_bh(); 2848 rcu_read_unlock_bh();
@@ -2708,6 +2890,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2708 .default_advmss = ipv4_default_advmss, 2890 .default_advmss = ipv4_default_advmss,
2709 .update_pmtu = ipv4_rt_blackhole_update_pmtu, 2891 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2710 .cow_metrics = ipv4_rt_blackhole_cow_metrics, 2892 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
2893 .neigh_lookup = ipv4_neigh_lookup,
2711}; 2894};
2712 2895
2713struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) 2896struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
@@ -3088,6 +3271,13 @@ static ctl_table ipv4_route_table[] = {
3088 .proc_handler = proc_dointvec_jiffies, 3271 .proc_handler = proc_dointvec_jiffies,
3089 }, 3272 },
3090 { 3273 {
3274 .procname = "gc_interval",
3275 .data = &ip_rt_gc_interval,
3276 .maxlen = sizeof(int),
3277 .mode = 0644,
3278 .proc_handler = proc_dointvec_jiffies,
3279 },
3280 {
3091 .procname = "redirect_load", 3281 .procname = "redirect_load",
3092 .data = &ip_rt_redirect_load, 3282 .data = &ip_rt_redirect_load,
3093 .maxlen = sizeof(int), 3283 .maxlen = sizeof(int),
@@ -3297,13 +3487,18 @@ int __init ip_rt_init(void)
3297 devinet_init(); 3487 devinet_init();
3298 ip_fib_init(); 3488 ip_fib_init();
3299 3489
3490 INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func);
3491 expires_ljiffies = jiffies;
3492 schedule_delayed_work(&expires_work,
3493 net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
3494
3300 if (ip_rt_proc_init()) 3495 if (ip_rt_proc_init())
3301 printk(KERN_ERR "Unable to create route proc files\n"); 3496 printk(KERN_ERR "Unable to create route proc files\n");
3302#ifdef CONFIG_XFRM 3497#ifdef CONFIG_XFRM
3303 xfrm_init(); 3498 xfrm_init();
3304 xfrm4_init(ip_rt_max_size); 3499 xfrm4_init(ip_rt_max_size);
3305#endif 3500#endif
3306 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); 3501 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
3307 3502
3308#ifdef CONFIG_SYSCTL 3503#ifdef CONFIG_SYSCTL
3309 register_pernet_subsys(&sysctl_route_ops); 3504 register_pernet_subsys(&sysctl_route_ops);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 26461492a84..3bc5c8f7c71 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -276,7 +276,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
276 int mss; 276 int mss;
277 struct rtable *rt; 277 struct rtable *rt;
278 __u8 rcv_wscale; 278 __u8 rcv_wscale;
279 bool ecn_ok; 279 bool ecn_ok = false;
280 280
281 if (!sysctl_tcp_syncookies || !th->ack || th->rst) 281 if (!sysctl_tcp_syncookies || !th->ack || th->rst)
282 goto out; 282 goto out;
@@ -316,6 +316,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
316 ireq->wscale_ok = tcp_opt.wscale_ok; 316 ireq->wscale_ok = tcp_opt.wscale_ok;
317 ireq->tstamp_ok = tcp_opt.saw_tstamp; 317 ireq->tstamp_ok = tcp_opt.saw_tstamp;
318 req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; 318 req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
319 treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
319 320
320 /* We throwed the options of the initial SYN away, so we hope 321 /* We throwed the options of the initial SYN away, so we hope
321 * the ACK carries the same options again (see RFC1122 4.2.3.8) 322 * the ACK carries the same options again (see RFC1122 4.2.3.8)
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 57d0752e239..69fd7201129 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -398,20 +398,6 @@ static struct ctl_table ipv4_table[] = {
398 .proc_handler = proc_dointvec_jiffies, 398 .proc_handler = proc_dointvec_jiffies,
399 }, 399 },
400 { 400 {
401 .procname = "inet_peer_gc_mintime",
402 .data = &inet_peer_gc_mintime,
403 .maxlen = sizeof(int),
404 .mode = 0644,
405 .proc_handler = proc_dointvec_jiffies,
406 },
407 {
408 .procname = "inet_peer_gc_maxtime",
409 .data = &inet_peer_gc_maxtime,
410 .maxlen = sizeof(int),
411 .mode = 0644,
412 .proc_handler = proc_dointvec_jiffies,
413 },
414 {
415 .procname = "tcp_orphan_retries", 401 .procname = "tcp_orphan_retries",
416 .data = &sysctl_tcp_orphan_retries, 402 .data = &sysctl_tcp_orphan_retries,
417 .maxlen = sizeof(int), 403 .maxlen = sizeof(int),
diff --git a/net/ipv4/sysfs_net_ipv4.c b/net/ipv4/sysfs_net_ipv4.c
new file mode 100644
index 00000000000..0cbbf10026a
--- /dev/null
+++ b/net/ipv4/sysfs_net_ipv4.c
@@ -0,0 +1,88 @@
1/*
2 * net/ipv4/sysfs_net_ipv4.c
3 *
4 * sysfs-based networking knobs (so we can, unlike with sysctl, control perms)
5 *
6 * Copyright (C) 2008 Google, Inc.
7 *
8 * Robert Love <rlove@google.com>
9 *
10 * This software is licensed under the terms of the GNU General Public
11 * License version 2, as published by the Free Software Foundation, and
12 * may be copied, distributed, and modified under those terms.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 */
19
20#include <linux/kobject.h>
21#include <linux/string.h>
22#include <linux/sysfs.h>
23#include <linux/init.h>
24#include <net/tcp.h>
25
26#define CREATE_IPV4_FILE(_name, _var) \
27static ssize_t _name##_show(struct kobject *kobj, \
28 struct kobj_attribute *attr, char *buf) \
29{ \
30 return sprintf(buf, "%d\n", _var); \
31} \
32static ssize_t _name##_store(struct kobject *kobj, \
33 struct kobj_attribute *attr, \
34 const char *buf, size_t count) \
35{ \
36 int val, ret; \
37 ret = sscanf(buf, "%d", &val); \
38 if (ret != 1) \
39 return -EINVAL; \
40 if (val < 0) \
41 return -EINVAL; \
42 _var = val; \
43 return count; \
44} \
45static struct kobj_attribute _name##_attr = \
46 __ATTR(_name, 0644, _name##_show, _name##_store)
47
48CREATE_IPV4_FILE(tcp_wmem_min, sysctl_tcp_wmem[0]);
49CREATE_IPV4_FILE(tcp_wmem_def, sysctl_tcp_wmem[1]);
50CREATE_IPV4_FILE(tcp_wmem_max, sysctl_tcp_wmem[2]);
51
52CREATE_IPV4_FILE(tcp_rmem_min, sysctl_tcp_rmem[0]);
53CREATE_IPV4_FILE(tcp_rmem_def, sysctl_tcp_rmem[1]);
54CREATE_IPV4_FILE(tcp_rmem_max, sysctl_tcp_rmem[2]);
55
56static struct attribute *ipv4_attrs[] = {
57 &tcp_wmem_min_attr.attr,
58 &tcp_wmem_def_attr.attr,
59 &tcp_wmem_max_attr.attr,
60 &tcp_rmem_min_attr.attr,
61 &tcp_rmem_def_attr.attr,
62 &tcp_rmem_max_attr.attr,
63 NULL
64};
65
66static struct attribute_group ipv4_attr_group = {
67 .attrs = ipv4_attrs,
68};
69
70static __init int sysfs_ipv4_init(void)
71{
72 struct kobject *ipv4_kobject;
73 int ret;
74
75 ipv4_kobject = kobject_create_and_add("ipv4", kernel_kobj);
76 if (!ipv4_kobject)
77 return -ENOMEM;
78
79 ret = sysfs_create_group(ipv4_kobject, &ipv4_attr_group);
80 if (ret) {
81 kobject_put(ipv4_kobject);
82 return ret;
83 }
84
85 return 0;
86}
87
88subsys_initcall(sysfs_ipv4_init);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 46febcacb72..09ced58e6a5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -266,11 +266,15 @@
266#include <linux/crypto.h> 266#include <linux/crypto.h>
267#include <linux/time.h> 267#include <linux/time.h>
268#include <linux/slab.h> 268#include <linux/slab.h>
269#include <linux/uid_stat.h>
269 270
270#include <net/icmp.h> 271#include <net/icmp.h>
271#include <net/tcp.h> 272#include <net/tcp.h>
272#include <net/xfrm.h> 273#include <net/xfrm.h>
273#include <net/ip.h> 274#include <net/ip.h>
275#include <net/ip6_route.h>
276#include <net/ipv6.h>
277#include <net/transp_v6.h>
274#include <net/netdma.h> 278#include <net/netdma.h>
275#include <net/sock.h> 279#include <net/sock.h>
276 280
@@ -1112,6 +1116,9 @@ out:
1112 if (copied) 1116 if (copied)
1113 tcp_push(sk, flags, mss_now, tp->nonagle); 1117 tcp_push(sk, flags, mss_now, tp->nonagle);
1114 release_sock(sk); 1118 release_sock(sk);
1119
1120 if (copied > 0)
1121 uid_stat_tcp_snd(current_uid(), copied);
1115 return copied; 1122 return copied;
1116 1123
1117do_fault: 1124do_fault:
@@ -1388,8 +1395,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1388 tcp_rcv_space_adjust(sk); 1395 tcp_rcv_space_adjust(sk);
1389 1396
1390 /* Clean up data we have read: This will do ACK frames. */ 1397 /* Clean up data we have read: This will do ACK frames. */
1391 if (copied > 0) 1398 if (copied > 0) {
1392 tcp_cleanup_rbuf(sk, copied); 1399 tcp_cleanup_rbuf(sk, copied);
1400 uid_stat_tcp_rcv(current_uid(), copied);
1401 }
1402
1393 return copied; 1403 return copied;
1394} 1404}
1395EXPORT_SYMBOL(tcp_read_sock); 1405EXPORT_SYMBOL(tcp_read_sock);
@@ -1771,6 +1781,9 @@ skip_copy:
1771 tcp_cleanup_rbuf(sk, copied); 1781 tcp_cleanup_rbuf(sk, copied);
1772 1782
1773 release_sock(sk); 1783 release_sock(sk);
1784
1785 if (copied > 0)
1786 uid_stat_tcp_rcv(current_uid(), copied);
1774 return copied; 1787 return copied;
1775 1788
1776out: 1789out:
@@ -1779,6 +1792,8 @@ out:
1779 1792
1780recv_urg: 1793recv_urg:
1781 err = tcp_recv_urg(sk, msg, len, flags); 1794 err = tcp_recv_urg(sk, msg, len, flags);
1795 if (err > 0)
1796 uid_stat_tcp_rcv(current_uid(), err);
1782 goto out; 1797 goto out;
1783} 1798}
1784EXPORT_SYMBOL(tcp_recvmsg); 1799EXPORT_SYMBOL(tcp_recvmsg);
@@ -3310,3 +3325,107 @@ void __init tcp_init(void)
3310 tcp_secret_retiring = &tcp_secret_two; 3325 tcp_secret_retiring = &tcp_secret_two;
3311 tcp_secret_secondary = &tcp_secret_two; 3326 tcp_secret_secondary = &tcp_secret_two;
3312} 3327}
3328
3329static int tcp_is_local(struct net *net, __be32 addr) {
3330 struct rtable *rt;
3331 struct flowi4 fl4 = { .daddr = addr };
3332 rt = ip_route_output_key(net, &fl4);
3333 if (IS_ERR_OR_NULL(rt))
3334 return 0;
3335 return rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK);
3336}
3337
3338#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
3339static int tcp_is_local6(struct net *net, struct in6_addr *addr) {
3340 struct rt6_info *rt6 = rt6_lookup(net, addr, addr, 0, 0);
3341 return rt6 && rt6->rt6i_dev && (rt6->rt6i_dev->flags & IFF_LOOPBACK);
3342}
3343#endif
3344
3345/*
3346 * tcp_nuke_addr - destroy all sockets on the given local address
3347 * if local address is the unspecified address (0.0.0.0 or ::), destroy all
3348 * sockets with local addresses that are not configured.
3349 */
3350int tcp_nuke_addr(struct net *net, struct sockaddr *addr)
3351{
3352 int family = addr->sa_family;
3353 unsigned int bucket;
3354
3355 struct in_addr *in;
3356#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
3357 struct in6_addr *in6;
3358#endif
3359 if (family == AF_INET) {
3360 in = &((struct sockaddr_in *)addr)->sin_addr;
3361#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
3362 } else if (family == AF_INET6) {
3363 in6 = &((struct sockaddr_in6 *)addr)->sin6_addr;
3364#endif
3365 } else {
3366 return -EAFNOSUPPORT;
3367 }
3368
3369 for (bucket = 0; bucket < tcp_hashinfo.ehash_mask; bucket++) {
3370 struct hlist_nulls_node *node;
3371 struct sock *sk;
3372 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, bucket);
3373
3374restart:
3375 spin_lock_bh(lock);
3376 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[bucket].chain) {
3377 struct inet_sock *inet = inet_sk(sk);
3378
3379 if (sysctl_ip_dynaddr && sk->sk_state == TCP_SYN_SENT)
3380 continue;
3381 if (sock_flag(sk, SOCK_DEAD))
3382 continue;
3383
3384 if (family == AF_INET) {
3385 __be32 s4 = inet->inet_rcv_saddr;
3386 if (s4 == LOOPBACK4_IPV6)
3387 continue;
3388
3389 if (in->s_addr != s4 &&
3390 !(in->s_addr == INADDR_ANY &&
3391 !tcp_is_local(net, s4)))
3392 continue;
3393 }
3394
3395#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
3396 if (family == AF_INET6) {
3397 struct in6_addr *s6;
3398 if (!inet->pinet6)
3399 continue;
3400
3401 s6 = &inet->pinet6->rcv_saddr;
3402 if (ipv6_addr_type(s6) == IPV6_ADDR_MAPPED)
3403 continue;
3404
3405 if (!ipv6_addr_equal(in6, s6) &&
3406 !(ipv6_addr_equal(in6, &in6addr_any) &&
3407 !tcp_is_local6(net, s6)))
3408 continue;
3409 }
3410#endif
3411
3412 sock_hold(sk);
3413 spin_unlock_bh(lock);
3414
3415 local_bh_disable();
3416 bh_lock_sock(sk);
3417 sk->sk_err = ETIMEDOUT;
3418 sk->sk_error_report(sk);
3419
3420 tcp_done(sk);
3421 bh_unlock_sock(sk);
3422 local_bh_enable();
3423 sock_put(sk);
3424
3425 goto restart;
3426 }
3427 spin_unlock_bh(lock);
3428 }
3429
3430 return 0;
3431}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bef9f04c22b..d73aab3fbfc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -880,6 +880,11 @@ static void tcp_init_metrics(struct sock *sk)
880 tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); 880 tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH);
881 if (tp->snd_ssthresh > tp->snd_cwnd_clamp) 881 if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
882 tp->snd_ssthresh = tp->snd_cwnd_clamp; 882 tp->snd_ssthresh = tp->snd_cwnd_clamp;
883 } else {
884 /* ssthresh may have been reduced unnecessarily during.
885 * 3WHS. Restore it back to its initial default.
886 */
887 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
883 } 888 }
884 if (dst_metric(dst, RTAX_REORDERING) && 889 if (dst_metric(dst, RTAX_REORDERING) &&
885 tp->reordering != dst_metric(dst, RTAX_REORDERING)) { 890 tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
@@ -887,10 +892,7 @@ static void tcp_init_metrics(struct sock *sk)
887 tp->reordering = dst_metric(dst, RTAX_REORDERING); 892 tp->reordering = dst_metric(dst, RTAX_REORDERING);
888 } 893 }
889 894
890 if (dst_metric(dst, RTAX_RTT) == 0) 895 if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0)
891 goto reset;
892
893 if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
894 goto reset; 896 goto reset;
895 897
896 /* Initial rtt is determined from SYN,SYN-ACK. 898 /* Initial rtt is determined from SYN,SYN-ACK.
@@ -916,19 +918,26 @@ static void tcp_init_metrics(struct sock *sk)
916 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); 918 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
917 } 919 }
918 tcp_set_rto(sk); 920 tcp_set_rto(sk);
919 if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) {
920reset: 921reset:
921 /* Play conservative. If timestamps are not 922 if (tp->srtt == 0) {
922 * supported, TCP will fail to recalculate correct 923 /* RFC2988bis: We've failed to get a valid RTT sample from
923 * rtt, if initial rto is too small. FORGET ALL AND RESET! 924 * 3WHS. This is most likely due to retransmission,
925 * including spurious one. Reset the RTO back to 3secs
926 * from the more aggressive 1sec to avoid more spurious
927 * retransmission.
924 */ 928 */
925 if (!tp->rx_opt.saw_tstamp && tp->srtt) { 929 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK;
926 tp->srtt = 0; 930 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
927 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
928 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
929 }
930 } 931 }
931 tp->snd_cwnd = tcp_init_cwnd(tp, dst); 932 /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
933 * retransmitted. In light of RFC2988bis' more aggressive 1sec
934 * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
935 * retransmission has occurred.
936 */
937 if (tp->total_retrans > 1)
938 tp->snd_cwnd = 1;
939 else
940 tp->snd_cwnd = tcp_init_cwnd(tp, dst);
932 tp->snd_cwnd_stamp = tcp_time_stamp; 941 tp->snd_cwnd_stamp = tcp_time_stamp;
933} 942}
934 943
@@ -1115,7 +1124,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
1115 return 0; 1124 return 0;
1116 1125
1117 /* ...Then it's D-SACK, and must reside below snd_una completely */ 1126 /* ...Then it's D-SACK, and must reside below snd_una completely */
1118 if (!after(end_seq, tp->snd_una)) 1127 if (after(end_seq, tp->snd_una))
1119 return 0; 1128 return 0;
1120 1129
1121 if (!before(start_seq, tp->undo_marker)) 1130 if (!before(start_seq, tp->undo_marker))
@@ -1380,9 +1389,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1380 1389
1381 BUG_ON(!pcount); 1390 BUG_ON(!pcount);
1382 1391
1383 /* Tweak before seqno plays */ 1392 if (skb == tp->lost_skb_hint)
1384 if (!tcp_is_fack(tp) && tcp_is_sack(tp) && tp->lost_skb_hint &&
1385 !before(TCP_SKB_CB(tp->lost_skb_hint)->seq, TCP_SKB_CB(skb)->seq))
1386 tp->lost_cnt_hint += pcount; 1393 tp->lost_cnt_hint += pcount;
1387 1394
1388 TCP_SKB_CB(prev)->end_seq += shifted; 1395 TCP_SKB_CB(prev)->end_seq += shifted;
@@ -3112,12 +3119,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
3112 tcp_xmit_retransmit_queue(sk); 3119 tcp_xmit_retransmit_queue(sk);
3113} 3120}
3114 3121
3115static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) 3122void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
3116{ 3123{
3117 tcp_rtt_estimator(sk, seq_rtt); 3124 tcp_rtt_estimator(sk, seq_rtt);
3118 tcp_set_rto(sk); 3125 tcp_set_rto(sk);
3119 inet_csk(sk)->icsk_backoff = 0; 3126 inet_csk(sk)->icsk_backoff = 0;
3120} 3127}
3128EXPORT_SYMBOL(tcp_valid_rtt_meas);
3121 3129
3122/* Read draft-ietf-tcplw-high-performance before mucking 3130/* Read draft-ietf-tcplw-high-performance before mucking
3123 * with this code. (Supersedes RFC1323) 3131 * with this code. (Supersedes RFC1323)
@@ -5806,12 +5814,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5806 tp->rx_opt.snd_wscale; 5814 tp->rx_opt.snd_wscale;
5807 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); 5815 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5808 5816
5809 /* tcp_ack considers this ACK as duplicate
5810 * and does not calculate rtt.
5811 * Force it here.
5812 */
5813 tcp_ack_update_rtt(sk, 0, 0);
5814
5815 if (tp->rx_opt.tstamp_ok) 5817 if (tp->rx_opt.tstamp_ok)
5816 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; 5818 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5817 5819
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 708dc203b03..6cdf6a28f6b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -72,6 +72,7 @@
72#include <net/timewait_sock.h> 72#include <net/timewait_sock.h>
73#include <net/xfrm.h> 73#include <net/xfrm.h>
74#include <net/netdma.h> 74#include <net/netdma.h>
75#include <net/secure_seq.h>
75 76
76#include <linux/inet.h> 77#include <linux/inet.h>
77#include <linux/ipv6.h> 78#include <linux/ipv6.h>
@@ -429,8 +430,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
429 break; 430 break;
430 431
431 icsk->icsk_backoff--; 432 icsk->icsk_backoff--;
432 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << 433 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
433 icsk->icsk_backoff; 434 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
434 tcp_bound_rto(sk); 435 tcp_bound_rto(sk);
435 436
436 skb = tcp_write_queue_head(sk); 437 skb = tcp_write_queue_head(sk);
@@ -629,7 +630,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
629 arg.iov[0].iov_len = sizeof(rep.th); 630 arg.iov[0].iov_len = sizeof(rep.th);
630 631
631#ifdef CONFIG_TCP_MD5SIG 632#ifdef CONFIG_TCP_MD5SIG
632 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL; 633 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL;
633 if (key) { 634 if (key) {
634 rep.opt[0] = htonl((TCPOPT_NOP << 24) | 635 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
635 (TCPOPT_NOP << 16) | 636 (TCPOPT_NOP << 16) |
@@ -807,20 +808,38 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
807 kfree(inet_rsk(req)->opt); 808 kfree(inet_rsk(req)->opt);
808} 809}
809 810
810static void syn_flood_warning(const struct sk_buff *skb) 811/*
812 * Return 1 if a syncookie should be sent
813 */
814int tcp_syn_flood_action(struct sock *sk,
815 const struct sk_buff *skb,
816 const char *proto)
811{ 817{
812 const char *msg; 818 const char *msg = "Dropping request";
819 int want_cookie = 0;
820 struct listen_sock *lopt;
821
822
813 823
814#ifdef CONFIG_SYN_COOKIES 824#ifdef CONFIG_SYN_COOKIES
815 if (sysctl_tcp_syncookies) 825 if (sysctl_tcp_syncookies) {
816 msg = "Sending cookies"; 826 msg = "Sending cookies";
817 else 827 want_cookie = 1;
828 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
829 } else
818#endif 830#endif
819 msg = "Dropping request"; 831 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
820 832
821 pr_info("TCP: Possible SYN flooding on port %d. %s.\n", 833 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
822 ntohs(tcp_hdr(skb)->dest), msg); 834 if (!lopt->synflood_warned) {
835 lopt->synflood_warned = 1;
836 pr_info("%s: Possible SYN flooding on port %d. %s. "
837 " Check SNMP counters.\n",
838 proto, ntohs(tcp_hdr(skb)->dest), msg);
839 }
840 return want_cookie;
823} 841}
842EXPORT_SYMBOL(tcp_syn_flood_action);
824 843
825/* 844/*
826 * Save and compile IPv4 options into the request_sock if needed. 845 * Save and compile IPv4 options into the request_sock if needed.
@@ -908,18 +927,21 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
908 } 927 }
909 sk_nocaps_add(sk, NETIF_F_GSO_MASK); 928 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
910 } 929 }
911 if (tcp_alloc_md5sig_pool(sk) == NULL) { 930
931 md5sig = tp->md5sig_info;
932 if (md5sig->entries4 == 0 &&
933 tcp_alloc_md5sig_pool(sk) == NULL) {
912 kfree(newkey); 934 kfree(newkey);
913 return -ENOMEM; 935 return -ENOMEM;
914 } 936 }
915 md5sig = tp->md5sig_info;
916 937
917 if (md5sig->alloced4 == md5sig->entries4) { 938 if (md5sig->alloced4 == md5sig->entries4) {
918 keys = kmalloc((sizeof(*keys) * 939 keys = kmalloc((sizeof(*keys) *
919 (md5sig->entries4 + 1)), GFP_ATOMIC); 940 (md5sig->entries4 + 1)), GFP_ATOMIC);
920 if (!keys) { 941 if (!keys) {
921 kfree(newkey); 942 kfree(newkey);
922 tcp_free_md5sig_pool(); 943 if (md5sig->entries4 == 0)
944 tcp_free_md5sig_pool();
923 return -ENOMEM; 945 return -ENOMEM;
924 } 946 }
925 947
@@ -963,6 +985,7 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
963 kfree(tp->md5sig_info->keys4); 985 kfree(tp->md5sig_info->keys4);
964 tp->md5sig_info->keys4 = NULL; 986 tp->md5sig_info->keys4 = NULL;
965 tp->md5sig_info->alloced4 = 0; 987 tp->md5sig_info->alloced4 = 0;
988 tcp_free_md5sig_pool();
966 } else if (tp->md5sig_info->entries4 != i) { 989 } else if (tp->md5sig_info->entries4 != i) {
967 /* Need to do some manipulation */ 990 /* Need to do some manipulation */
968 memmove(&tp->md5sig_info->keys4[i], 991 memmove(&tp->md5sig_info->keys4[i],
@@ -970,7 +993,6 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
970 (tp->md5sig_info->entries4 - i) * 993 (tp->md5sig_info->entries4 - i) *
971 sizeof(struct tcp4_md5sig_key)); 994 sizeof(struct tcp4_md5sig_key));
972 } 995 }
973 tcp_free_md5sig_pool();
974 return 0; 996 return 0;
975 } 997 }
976 } 998 }
@@ -1234,11 +1256,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1234 __be32 saddr = ip_hdr(skb)->saddr; 1256 __be32 saddr = ip_hdr(skb)->saddr;
1235 __be32 daddr = ip_hdr(skb)->daddr; 1257 __be32 daddr = ip_hdr(skb)->daddr;
1236 __u32 isn = TCP_SKB_CB(skb)->when; 1258 __u32 isn = TCP_SKB_CB(skb)->when;
1237#ifdef CONFIG_SYN_COOKIES
1238 int want_cookie = 0; 1259 int want_cookie = 0;
1239#else
1240#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1241#endif
1242 1260
1243 /* Never answer to SYNs send to broadcast or multicast */ 1261 /* Never answer to SYNs send to broadcast or multicast */
1244 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) 1262 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
@@ -1249,14 +1267,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1249 * evidently real one. 1267 * evidently real one.
1250 */ 1268 */
1251 if (inet_csk_reqsk_queue_is_full(sk) && !isn) { 1269 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1252 if (net_ratelimit()) 1270 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1253 syn_flood_warning(skb); 1271 if (!want_cookie)
1254#ifdef CONFIG_SYN_COOKIES 1272 goto drop;
1255 if (sysctl_tcp_syncookies) {
1256 want_cookie = 1;
1257 } else
1258#endif
1259 goto drop;
1260 } 1273 }
1261 1274
1262 /* Accept backlog is full. If we have already queued enough 1275 /* Accept backlog is full. If we have already queued enough
@@ -1302,9 +1315,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1302 while (l-- > 0) 1315 while (l-- > 0)
1303 *c++ ^= *hash_location++; 1316 *c++ ^= *hash_location++;
1304 1317
1305#ifdef CONFIG_SYN_COOKIES
1306 want_cookie = 0; /* not our kind of cookie */ 1318 want_cookie = 0; /* not our kind of cookie */
1307#endif
1308 tmp_ext.cookie_out_never = 0; /* false */ 1319 tmp_ext.cookie_out_never = 0; /* false */
1309 tmp_ext.cookie_plus = tmp_opt.cookie_plus; 1320 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1310 } else if (!tp->rx_opt.cookie_in_always) { 1321 } else if (!tp->rx_opt.cookie_in_always) {
@@ -1384,6 +1395,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1384 isn = tcp_v4_init_sequence(skb); 1395 isn = tcp_v4_init_sequence(skb);
1385 } 1396 }
1386 tcp_rsk(req)->snt_isn = isn; 1397 tcp_rsk(req)->snt_isn = isn;
1398 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1387 1399
1388 if (tcp_v4_send_synack(sk, dst, req, 1400 if (tcp_v4_send_synack(sk, dst, req,
1389 (struct request_values *)&tmp_ext) || 1401 (struct request_values *)&tmp_ext) ||
@@ -1458,6 +1470,10 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1458 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; 1470 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1459 1471
1460 tcp_initialize_rcv_mss(newsk); 1472 tcp_initialize_rcv_mss(newsk);
1473 if (tcp_rsk(req)->snt_synack)
1474 tcp_valid_rtt_meas(newsk,
1475 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1476 newtp->total_retrans = req->retrans;
1461 1477
1462#ifdef CONFIG_TCP_MD5SIG 1478#ifdef CONFIG_TCP_MD5SIG
1463 /* Copy over the MD5 key from the original socket */ 1479 /* Copy over the MD5 key from the original socket */
@@ -1855,7 +1871,7 @@ static int tcp_v4_init_sock(struct sock *sk)
1855 * algorithms that we must have the following bandaid to talk 1871 * algorithms that we must have the following bandaid to talk
1856 * efficiently to them. -DaveM 1872 * efficiently to them. -DaveM
1857 */ 1873 */
1858 tp->snd_cwnd = 2; 1874 tp->snd_cwnd = TCP_INIT_CWND;
1859 1875
1860 /* See draft-stevens-tcpca-spec-01 for discussion of the 1876 /* See draft-stevens-tcpca-spec-01 for discussion of the
1861 * initialization of these values. 1877 * initialization of these values.
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 80b1f80759a..0ce3d06dce6 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -328,6 +328,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
328 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); 328 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
329 const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); 329 const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
330 330
331 tw->tw_transparent = inet_sk(sk)->transparent;
331 tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; 332 tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
332 tcptw->tw_rcv_nxt = tp->rcv_nxt; 333 tcptw->tw_rcv_nxt = tp->rcv_nxt;
333 tcptw->tw_snd_nxt = tp->snd_nxt; 334 tcptw->tw_snd_nxt = tp->snd_nxt;
@@ -486,7 +487,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
486 * algorithms that we must have the following bandaid to talk 487 * algorithms that we must have the following bandaid to talk
487 * efficiently to them. -DaveM 488 * efficiently to them. -DaveM
488 */ 489 */
489 newtp->snd_cwnd = 2; 490 newtp->snd_cwnd = TCP_INIT_CWND;
490 newtp->snd_cwnd_cnt = 0; 491 newtp->snd_cwnd_cnt = 0;
491 newtp->bytes_acked = 0; 492 newtp->bytes_acked = 0;
492 493
@@ -720,6 +721,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
720 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); 721 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
721 return NULL; 722 return NULL;
722 } 723 }
724 if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
725 tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
726 else if (req->retrans) /* don't take RTT sample if retrans && ~TS */
727 tcp_rsk(req)->snt_synack = 0;
723 728
724 /* OK, ACK is valid, create big socket and 729 /* OK, ACK is valid, create big socket and
725 * feed this segment to it. It will repeat all 730 * feed this segment to it. It will repeat all
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 882e0b0964d..faf257b9415 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1134,11 +1134,9 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1134 sk_mem_uncharge(sk, len); 1134 sk_mem_uncharge(sk, len);
1135 sock_set_flag(sk, SOCK_QUEUE_SHRUNK); 1135 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
1136 1136
1137 /* Any change of skb->len requires recalculation of tso 1137 /* Any change of skb->len requires recalculation of tso factor. */
1138 * factor and mss.
1139 */
1140 if (tcp_skb_pcount(skb) > 1) 1138 if (tcp_skb_pcount(skb) > 1)
1141 tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk)); 1139 tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb));
1142 1140
1143 return 0; 1141 return 0;
1144} 1142}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 198f75b7bdd..1b5a19340a9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -105,6 +105,7 @@
105#include <net/route.h> 105#include <net/route.h>
106#include <net/checksum.h> 106#include <net/checksum.h>
107#include <net/xfrm.h> 107#include <net/xfrm.h>
108#include <trace/events/udp.h>
108#include "udp_impl.h" 109#include "udp_impl.h"
109 110
110struct udp_table udp_table __read_mostly; 111struct udp_table udp_table __read_mostly;
@@ -1366,6 +1367,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1366 is_udplite); 1367 is_udplite);
1367 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 1368 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1368 kfree_skb(skb); 1369 kfree_skb(skb);
1370 trace_udp_fail_queue_rcv_skb(rc, sk);
1369 return -1; 1371 return -1;
1370 } 1372 }
1371 1373
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 981e43eaf70..a0b4c5da8d4 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -79,13 +79,13 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
79 struct rtable *rt = (struct rtable *)xdst->route; 79 struct rtable *rt = (struct rtable *)xdst->route;
80 const struct flowi4 *fl4 = &fl->u.ip4; 80 const struct flowi4 *fl4 = &fl->u.ip4;
81 81
82 rt->rt_key_dst = fl4->daddr; 82 xdst->u.rt.rt_key_dst = fl4->daddr;
83 rt->rt_key_src = fl4->saddr; 83 xdst->u.rt.rt_key_src = fl4->saddr;
84 rt->rt_key_tos = fl4->flowi4_tos; 84 xdst->u.rt.rt_key_tos = fl4->flowi4_tos;
85 rt->rt_route_iif = fl4->flowi4_iif; 85 xdst->u.rt.rt_route_iif = fl4->flowi4_iif;
86 rt->rt_iif = fl4->flowi4_iif; 86 xdst->u.rt.rt_iif = fl4->flowi4_iif;
87 rt->rt_oif = fl4->flowi4_oif; 87 xdst->u.rt.rt_oif = fl4->flowi4_oif;
88 rt->rt_mark = fl4->flowi4_mark; 88 xdst->u.rt.rt_mark = fl4->flowi4_mark;
89 89
90 xdst->u.dst.dev = dev; 90 xdst->u.dst.dev = dev;
91 dev_hold(dev); 91 dev_hold(dev);
@@ -117,7 +117,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
117 memset(fl4, 0, sizeof(struct flowi4)); 117 memset(fl4, 0, sizeof(struct flowi4));
118 fl4->flowi4_mark = skb->mark; 118 fl4->flowi4_mark = skb->mark;
119 119
120 if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { 120 if (!ip_is_fragment(iph)) {
121 switch (iph->protocol) { 121 switch (iph->protocol) {
122 case IPPROTO_UDP: 122 case IPPROTO_UDP:
123 case IPPROTO_UDPLITE: 123 case IPPROTO_UDPLITE: