diff options
Diffstat (limited to 'net/ipv4')
47 files changed, 1037 insertions, 767 deletions
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index f2dc69cffb5..681084d76a9 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
@@ -14,6 +14,7 @@ obj-y := route.o inetpeer.o protocol.o \ | |||
14 | inet_fragment.o ping.o | 14 | inet_fragment.o ping.o |
15 | 15 | ||
16 | obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o | 16 | obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o |
17 | obj-$(CONFIG_SYSFS) += sysfs_net_ipv4.o | ||
17 | obj-$(CONFIG_PROC_FS) += proc.o | 18 | obj-$(CONFIG_PROC_FS) += proc.o |
18 | obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o | 19 | obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o |
19 | obj-$(CONFIG_IP_MROUTE) += ipmr.o | 20 | obj-$(CONFIG_IP_MROUTE) += ipmr.o |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ef1528af7ab..bf488051a8d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -118,6 +118,19 @@ | |||
118 | #include <linux/mroute.h> | 118 | #include <linux/mroute.h> |
119 | #endif | 119 | #endif |
120 | 120 | ||
121 | #ifdef CONFIG_ANDROID_PARANOID_NETWORK | ||
122 | #include <linux/android_aid.h> | ||
123 | |||
124 | static inline int current_has_network(void) | ||
125 | { | ||
126 | return in_egroup_p(AID_INET) || capable(CAP_NET_RAW); | ||
127 | } | ||
128 | #else | ||
129 | static inline int current_has_network(void) | ||
130 | { | ||
131 | return 1; | ||
132 | } | ||
133 | #endif | ||
121 | 134 | ||
122 | /* The inetsw table contains everything that inet_create needs to | 135 | /* The inetsw table contains everything that inet_create needs to |
123 | * build a new socket. | 136 | * build a new socket. |
@@ -258,6 +271,7 @@ static inline int inet_netns_ok(struct net *net, int protocol) | |||
258 | return ipprot->netns_ok; | 271 | return ipprot->netns_ok; |
259 | } | 272 | } |
260 | 273 | ||
274 | |||
261 | /* | 275 | /* |
262 | * Create an inet socket. | 276 | * Create an inet socket. |
263 | */ | 277 | */ |
@@ -274,6 +288,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, | |||
274 | int try_loading_module = 0; | 288 | int try_loading_module = 0; |
275 | int err; | 289 | int err; |
276 | 290 | ||
291 | if (!current_has_network()) | ||
292 | return -EACCES; | ||
293 | |||
277 | if (unlikely(!inet_ehash_secret)) | 294 | if (unlikely(!inet_ehash_secret)) |
278 | if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) | 295 | if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) |
279 | build_ehash_secret(); | 296 | build_ehash_secret(); |
@@ -466,8 +483,13 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | |||
466 | goto out; | 483 | goto out; |
467 | 484 | ||
468 | if (addr->sin_family != AF_INET) { | 485 | if (addr->sin_family != AF_INET) { |
486 | /* Compatibility games : accept AF_UNSPEC (mapped to AF_INET) | ||
487 | * only if s_addr is INADDR_ANY. | ||
488 | */ | ||
469 | err = -EAFNOSUPPORT; | 489 | err = -EAFNOSUPPORT; |
470 | goto out; | 490 | if (addr->sin_family != AF_UNSPEC || |
491 | addr->sin_addr.s_addr != htonl(INADDR_ANY)) | ||
492 | goto out; | ||
471 | } | 493 | } |
472 | 494 | ||
473 | chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); | 495 | chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); |
@@ -874,6 +896,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
874 | case SIOCSIFPFLAGS: | 896 | case SIOCSIFPFLAGS: |
875 | case SIOCGIFPFLAGS: | 897 | case SIOCGIFPFLAGS: |
876 | case SIOCSIFFLAGS: | 898 | case SIOCSIFFLAGS: |
899 | case SIOCKILLADDR: | ||
877 | err = devinet_ioctl(net, cmd, (void __user *)arg); | 900 | err = devinet_ioctl(net, cmd, (void __user *)arg); |
878 | break; | 901 | break; |
879 | default: | 902 | default: |
@@ -1440,11 +1463,11 @@ EXPORT_SYMBOL_GPL(inet_ctl_sock_create); | |||
1440 | unsigned long snmp_fold_field(void __percpu *mib[], int offt) | 1463 | unsigned long snmp_fold_field(void __percpu *mib[], int offt) |
1441 | { | 1464 | { |
1442 | unsigned long res = 0; | 1465 | unsigned long res = 0; |
1443 | int i; | 1466 | int i, j; |
1444 | 1467 | ||
1445 | for_each_possible_cpu(i) { | 1468 | for_each_possible_cpu(i) { |
1446 | res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt); | 1469 | for (j = 0; j < SNMP_ARRAY_SZ; j++) |
1447 | res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt); | 1470 | res += *(((unsigned long *) per_cpu_ptr(mib[j], i)) + offt); |
1448 | } | 1471 | } |
1449 | return res; | 1472 | return res; |
1450 | } | 1473 | } |
@@ -1458,28 +1481,19 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) | |||
1458 | int cpu; | 1481 | int cpu; |
1459 | 1482 | ||
1460 | for_each_possible_cpu(cpu) { | 1483 | for_each_possible_cpu(cpu) { |
1461 | void *bhptr, *userptr; | 1484 | void *bhptr; |
1462 | struct u64_stats_sync *syncp; | 1485 | struct u64_stats_sync *syncp; |
1463 | u64 v_bh, v_user; | 1486 | u64 v; |
1464 | unsigned int start; | 1487 | unsigned int start; |
1465 | 1488 | ||
1466 | /* first mib used by softirq context, we must use _bh() accessors */ | 1489 | bhptr = per_cpu_ptr(mib[0], cpu); |
1467 | bhptr = per_cpu_ptr(SNMP_STAT_BHPTR(mib), cpu); | ||
1468 | syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); | 1490 | syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); |
1469 | do { | 1491 | do { |
1470 | start = u64_stats_fetch_begin_bh(syncp); | 1492 | start = u64_stats_fetch_begin_bh(syncp); |
1471 | v_bh = *(((u64 *) bhptr) + offt); | 1493 | v = *(((u64 *) bhptr) + offt); |
1472 | } while (u64_stats_fetch_retry_bh(syncp, start)); | 1494 | } while (u64_stats_fetch_retry_bh(syncp, start)); |
1473 | 1495 | ||
1474 | /* second mib used in USER context */ | 1496 | res += v; |
1475 | userptr = per_cpu_ptr(SNMP_STAT_USRPTR(mib), cpu); | ||
1476 | syncp = (struct u64_stats_sync *)(userptr + syncp_offset); | ||
1477 | do { | ||
1478 | start = u64_stats_fetch_begin(syncp); | ||
1479 | v_user = *(((u64 *) userptr) + offt); | ||
1480 | } while (u64_stats_fetch_retry(syncp, start)); | ||
1481 | |||
1482 | res += v_bh + v_user; | ||
1483 | } | 1497 | } |
1484 | return res; | 1498 | return res; |
1485 | } | 1499 | } |
@@ -1491,25 +1505,28 @@ int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align) | |||
1491 | BUG_ON(ptr == NULL); | 1505 | BUG_ON(ptr == NULL); |
1492 | ptr[0] = __alloc_percpu(mibsize, align); | 1506 | ptr[0] = __alloc_percpu(mibsize, align); |
1493 | if (!ptr[0]) | 1507 | if (!ptr[0]) |
1494 | goto err0; | 1508 | return -ENOMEM; |
1509 | #if SNMP_ARRAY_SZ == 2 | ||
1495 | ptr[1] = __alloc_percpu(mibsize, align); | 1510 | ptr[1] = __alloc_percpu(mibsize, align); |
1496 | if (!ptr[1]) | 1511 | if (!ptr[1]) { |
1497 | goto err1; | 1512 | free_percpu(ptr[0]); |
1513 | ptr[0] = NULL; | ||
1514 | return -ENOMEM; | ||
1515 | } | ||
1516 | #endif | ||
1498 | return 0; | 1517 | return 0; |
1499 | err1: | ||
1500 | free_percpu(ptr[0]); | ||
1501 | ptr[0] = NULL; | ||
1502 | err0: | ||
1503 | return -ENOMEM; | ||
1504 | } | 1518 | } |
1505 | EXPORT_SYMBOL_GPL(snmp_mib_init); | 1519 | EXPORT_SYMBOL_GPL(snmp_mib_init); |
1506 | 1520 | ||
1507 | void snmp_mib_free(void __percpu *ptr[2]) | 1521 | void snmp_mib_free(void __percpu *ptr[SNMP_ARRAY_SZ]) |
1508 | { | 1522 | { |
1523 | int i; | ||
1524 | |||
1509 | BUG_ON(ptr == NULL); | 1525 | BUG_ON(ptr == NULL); |
1510 | free_percpu(ptr[0]); | 1526 | for (i = 0; i < SNMP_ARRAY_SZ; i++) { |
1511 | free_percpu(ptr[1]); | 1527 | free_percpu(ptr[i]); |
1512 | ptr[0] = ptr[1] = NULL; | 1528 | ptr[i] = NULL; |
1529 | } | ||
1513 | } | 1530 | } |
1514 | EXPORT_SYMBOL_GPL(snmp_mib_free); | 1531 | EXPORT_SYMBOL_GPL(snmp_mib_free); |
1515 | 1532 | ||
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index c1f4154552f..36d14406261 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c | |||
@@ -136,8 +136,6 @@ static void ah_output_done(struct crypto_async_request *base, int err) | |||
136 | memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); | 136 | memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); |
137 | } | 137 | } |
138 | 138 | ||
139 | err = ah->nexthdr; | ||
140 | |||
141 | kfree(AH_SKB_CB(skb)->tmp); | 139 | kfree(AH_SKB_CB(skb)->tmp); |
142 | xfrm_output_resume(skb, err); | 140 | xfrm_output_resume(skb, err); |
143 | } | 141 | } |
@@ -264,12 +262,12 @@ static void ah_input_done(struct crypto_async_request *base, int err) | |||
264 | if (err) | 262 | if (err) |
265 | goto out; | 263 | goto out; |
266 | 264 | ||
265 | err = ah->nexthdr; | ||
266 | |||
267 | skb->network_header += ah_hlen; | 267 | skb->network_header += ah_hlen; |
268 | memcpy(skb_network_header(skb), work_iph, ihl); | 268 | memcpy(skb_network_header(skb), work_iph, ihl); |
269 | __skb_pull(skb, ah_hlen + ihl); | 269 | __skb_pull(skb, ah_hlen + ihl); |
270 | skb_set_transport_header(skb, -ihl); | 270 | skb_set_transport_header(skb, -ihl); |
271 | |||
272 | err = ah->nexthdr; | ||
273 | out: | 271 | out: |
274 | kfree(AH_SKB_CB(skb)->tmp); | 272 | kfree(AH_SKB_CB(skb)->tmp); |
275 | xfrm_input_resume(skb, err); | 273 | xfrm_input_resume(skb, err); |
@@ -371,8 +369,6 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) | |||
371 | if (err == -EINPROGRESS) | 369 | if (err == -EINPROGRESS) |
372 | goto out; | 370 | goto out; |
373 | 371 | ||
374 | if (err == -EBUSY) | ||
375 | err = NET_XMIT_DROP; | ||
376 | goto out_free; | 372 | goto out_free; |
377 | } | 373 | } |
378 | 374 | ||
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 1b74d3b6437..96a164aa136 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -97,7 +97,6 @@ | |||
97 | #include <linux/init.h> | 97 | #include <linux/init.h> |
98 | #include <linux/net.h> | 98 | #include <linux/net.h> |
99 | #include <linux/rcupdate.h> | 99 | #include <linux/rcupdate.h> |
100 | #include <linux/jhash.h> | ||
101 | #include <linux/slab.h> | 100 | #include <linux/slab.h> |
102 | #ifdef CONFIG_SYSCTL | 101 | #ifdef CONFIG_SYSCTL |
103 | #include <linux/sysctl.h> | 102 | #include <linux/sysctl.h> |
@@ -139,8 +138,6 @@ static const struct neigh_ops arp_generic_ops = { | |||
139 | .error_report = arp_error_report, | 138 | .error_report = arp_error_report, |
140 | .output = neigh_resolve_output, | 139 | .output = neigh_resolve_output, |
141 | .connected_output = neigh_connected_output, | 140 | .connected_output = neigh_connected_output, |
142 | .hh_output = dev_queue_xmit, | ||
143 | .queue_xmit = dev_queue_xmit, | ||
144 | }; | 141 | }; |
145 | 142 | ||
146 | static const struct neigh_ops arp_hh_ops = { | 143 | static const struct neigh_ops arp_hh_ops = { |
@@ -149,16 +146,12 @@ static const struct neigh_ops arp_hh_ops = { | |||
149 | .error_report = arp_error_report, | 146 | .error_report = arp_error_report, |
150 | .output = neigh_resolve_output, | 147 | .output = neigh_resolve_output, |
151 | .connected_output = neigh_resolve_output, | 148 | .connected_output = neigh_resolve_output, |
152 | .hh_output = dev_queue_xmit, | ||
153 | .queue_xmit = dev_queue_xmit, | ||
154 | }; | 149 | }; |
155 | 150 | ||
156 | static const struct neigh_ops arp_direct_ops = { | 151 | static const struct neigh_ops arp_direct_ops = { |
157 | .family = AF_INET, | 152 | .family = AF_INET, |
158 | .output = dev_queue_xmit, | 153 | .output = neigh_direct_output, |
159 | .connected_output = dev_queue_xmit, | 154 | .connected_output = neigh_direct_output, |
160 | .hh_output = dev_queue_xmit, | ||
161 | .queue_xmit = dev_queue_xmit, | ||
162 | }; | 155 | }; |
163 | 156 | ||
164 | static const struct neigh_ops arp_broken_ops = { | 157 | static const struct neigh_ops arp_broken_ops = { |
@@ -167,8 +160,6 @@ static const struct neigh_ops arp_broken_ops = { | |||
167 | .error_report = arp_error_report, | 160 | .error_report = arp_error_report, |
168 | .output = neigh_compat_output, | 161 | .output = neigh_compat_output, |
169 | .connected_output = neigh_compat_output, | 162 | .connected_output = neigh_compat_output, |
170 | .hh_output = dev_queue_xmit, | ||
171 | .queue_xmit = dev_queue_xmit, | ||
172 | }; | 163 | }; |
173 | 164 | ||
174 | struct neigh_table arp_tbl = { | 165 | struct neigh_table arp_tbl = { |
@@ -232,7 +223,7 @@ static u32 arp_hash(const void *pkey, | |||
232 | const struct net_device *dev, | 223 | const struct net_device *dev, |
233 | __u32 hash_rnd) | 224 | __u32 hash_rnd) |
234 | { | 225 | { |
235 | return jhash_2words(*(u32 *)pkey, dev->ifindex, hash_rnd); | 226 | return arp_hashfn(*(u32 *)pkey, dev, hash_rnd); |
236 | } | 227 | } |
237 | 228 | ||
238 | static int arp_constructor(struct neighbour *neigh) | 229 | static int arp_constructor(struct neighbour *neigh) |
@@ -259,7 +250,7 @@ static int arp_constructor(struct neighbour *neigh) | |||
259 | if (!dev->header_ops) { | 250 | if (!dev->header_ops) { |
260 | neigh->nud_state = NUD_NOARP; | 251 | neigh->nud_state = NUD_NOARP; |
261 | neigh->ops = &arp_direct_ops; | 252 | neigh->ops = &arp_direct_ops; |
262 | neigh->output = neigh->ops->queue_xmit; | 253 | neigh->output = neigh_direct_output; |
263 | } else { | 254 | } else { |
264 | /* Good devices (checked by reading texts, but only Ethernet is | 255 | /* Good devices (checked by reading texts, but only Ethernet is |
265 | tested) | 256 | tested) |
@@ -518,30 +509,6 @@ EXPORT_SYMBOL(arp_find); | |||
518 | 509 | ||
519 | /* END OF OBSOLETE FUNCTIONS */ | 510 | /* END OF OBSOLETE FUNCTIONS */ |
520 | 511 | ||
521 | int arp_bind_neighbour(struct dst_entry *dst) | ||
522 | { | ||
523 | struct net_device *dev = dst->dev; | ||
524 | struct neighbour *n = dst->neighbour; | ||
525 | |||
526 | if (dev == NULL) | ||
527 | return -EINVAL; | ||
528 | if (n == NULL) { | ||
529 | __be32 nexthop = ((struct rtable *)dst)->rt_gateway; | ||
530 | if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) | ||
531 | nexthop = 0; | ||
532 | n = __neigh_lookup_errno( | ||
533 | #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) | ||
534 | dev->type == ARPHRD_ATM ? | ||
535 | clip_tbl_hook : | ||
536 | #endif | ||
537 | &arp_tbl, &nexthop, dev); | ||
538 | if (IS_ERR(n)) | ||
539 | return PTR_ERR(n); | ||
540 | dst->neighbour = n; | ||
541 | } | ||
542 | return 0; | ||
543 | } | ||
544 | |||
545 | /* | 512 | /* |
546 | * Check if we can use proxy ARP for this path | 513 | * Check if we can use proxy ARP for this path |
547 | */ | 514 | */ |
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 2b3c23c287c..2c2a98e402e 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c | |||
@@ -50,7 +50,7 @@ | |||
50 | #include <net/tcp.h> | 50 | #include <net/tcp.h> |
51 | #include <net/netlabel.h> | 51 | #include <net/netlabel.h> |
52 | #include <net/cipso_ipv4.h> | 52 | #include <net/cipso_ipv4.h> |
53 | #include <asm/atomic.h> | 53 | #include <linux/atomic.h> |
54 | #include <asm/bug.h> | 54 | #include <asm/bug.h> |
55 | #include <asm/unaligned.h> | 55 | #include <asm/unaligned.h> |
56 | 56 | ||
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 0d4a184af16..76db59202f1 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -59,6 +59,7 @@ | |||
59 | 59 | ||
60 | #include <net/arp.h> | 60 | #include <net/arp.h> |
61 | #include <net/ip.h> | 61 | #include <net/ip.h> |
62 | #include <net/tcp.h> | ||
62 | #include <net/route.h> | 63 | #include <net/route.h> |
63 | #include <net/ip_fib.h> | 64 | #include <net/ip_fib.h> |
64 | #include <net/rtnetlink.h> | 65 | #include <net/rtnetlink.h> |
@@ -735,6 +736,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
735 | case SIOCSIFBRDADDR: /* Set the broadcast address */ | 736 | case SIOCSIFBRDADDR: /* Set the broadcast address */ |
736 | case SIOCSIFDSTADDR: /* Set the destination address */ | 737 | case SIOCSIFDSTADDR: /* Set the destination address */ |
737 | case SIOCSIFNETMASK: /* Set the netmask for the interface */ | 738 | case SIOCSIFNETMASK: /* Set the netmask for the interface */ |
739 | case SIOCKILLADDR: /* Nuke all sockets on this address */ | ||
738 | ret = -EACCES; | 740 | ret = -EACCES; |
739 | if (!capable(CAP_NET_ADMIN)) | 741 | if (!capable(CAP_NET_ADMIN)) |
740 | goto out; | 742 | goto out; |
@@ -786,7 +788,8 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
786 | } | 788 | } |
787 | 789 | ||
788 | ret = -EADDRNOTAVAIL; | 790 | ret = -EADDRNOTAVAIL; |
789 | if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) | 791 | if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS |
792 | && cmd != SIOCKILLADDR) | ||
790 | goto done; | 793 | goto done; |
791 | 794 | ||
792 | switch (cmd) { | 795 | switch (cmd) { |
@@ -912,6 +915,9 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) | |||
912 | inet_insert_ifa(ifa); | 915 | inet_insert_ifa(ifa); |
913 | } | 916 | } |
914 | break; | 917 | break; |
918 | case SIOCKILLADDR: /* Nuke all connections on this address */ | ||
919 | ret = tcp_nuke_addr(net, (struct sockaddr *) sin); | ||
920 | break; | ||
915 | } | 921 | } |
916 | done: | 922 | done: |
917 | rtnl_unlock(); | 923 | rtnl_unlock(); |
@@ -1134,15 +1140,15 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev, | |||
1134 | struct in_device *in_dev) | 1140 | struct in_device *in_dev) |
1135 | 1141 | ||
1136 | { | 1142 | { |
1137 | struct in_ifaddr *ifa = in_dev->ifa_list; | 1143 | struct in_ifaddr *ifa; |
1138 | |||
1139 | if (!ifa) | ||
1140 | return; | ||
1141 | 1144 | ||
1142 | arp_send(ARPOP_REQUEST, ETH_P_ARP, | 1145 | for (ifa = in_dev->ifa_list; ifa; |
1143 | ifa->ifa_local, dev, | 1146 | ifa = ifa->ifa_next) { |
1144 | ifa->ifa_local, NULL, | 1147 | arp_send(ARPOP_REQUEST, ETH_P_ARP, |
1145 | dev->dev_addr, NULL); | 1148 | ifa->ifa_local, dev, |
1149 | ifa->ifa_local, NULL, | ||
1150 | dev->dev_addr, NULL); | ||
1151 | } | ||
1146 | } | 1152 | } |
1147 | 1153 | ||
1148 | /* Called only under RTNL semaphore */ | 1154 | /* Called only under RTNL semaphore */ |
@@ -1490,7 +1496,9 @@ static int devinet_conf_proc(ctl_table *ctl, int write, | |||
1490 | void __user *buffer, | 1496 | void __user *buffer, |
1491 | size_t *lenp, loff_t *ppos) | 1497 | size_t *lenp, loff_t *ppos) |
1492 | { | 1498 | { |
1499 | int old_value = *(int *)ctl->data; | ||
1493 | int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); | 1500 | int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); |
1501 | int new_value = *(int *)ctl->data; | ||
1494 | 1502 | ||
1495 | if (write) { | 1503 | if (write) { |
1496 | struct ipv4_devconf *cnf = ctl->extra1; | 1504 | struct ipv4_devconf *cnf = ctl->extra1; |
@@ -1501,6 +1509,9 @@ static int devinet_conf_proc(ctl_table *ctl, int write, | |||
1501 | 1509 | ||
1502 | if (cnf == net->ipv4.devconf_dflt) | 1510 | if (cnf == net->ipv4.devconf_dflt) |
1503 | devinet_copy_dflt_conf(net, i); | 1511 | devinet_copy_dflt_conf(net, i); |
1512 | if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1) | ||
1513 | if ((new_value == 0) && (old_value != 0)) | ||
1514 | rt_cache_flush(net, 0); | ||
1504 | } | 1515 | } |
1505 | 1516 | ||
1506 | return ret; | 1517 | return ret; |
@@ -1833,8 +1844,8 @@ void __init devinet_init(void) | |||
1833 | 1844 | ||
1834 | rtnl_af_register(&inet_af_ops); | 1845 | rtnl_af_register(&inet_af_ops); |
1835 | 1846 | ||
1836 | rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); | 1847 | rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); |
1837 | rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); | 1848 | rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); |
1838 | rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); | 1849 | rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); |
1839 | } | 1850 | } |
1840 | 1851 | ||
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 22524716fe7..92fc5f69f5d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -1124,9 +1124,9 @@ static struct pernet_operations fib_net_ops = { | |||
1124 | 1124 | ||
1125 | void __init ip_fib_init(void) | 1125 | void __init ip_fib_init(void) |
1126 | { | 1126 | { |
1127 | rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); | 1127 | rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); |
1128 | rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); | 1128 | rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); |
1129 | rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); | 1129 | rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); |
1130 | 1130 | ||
1131 | register_pernet_subsys(&fib_net_ops); | 1131 | register_pernet_subsys(&fib_net_ops); |
1132 | register_netdevice_notifier(&fib_netdev_notifier); | 1132 | register_netdevice_notifier(&fib_netdev_notifier); |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 33e2c35b74b..80106d89d54 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -142,6 +142,14 @@ const struct fib_prop fib_props[RTN_MAX + 1] = { | |||
142 | }; | 142 | }; |
143 | 143 | ||
144 | /* Release a nexthop info record */ | 144 | /* Release a nexthop info record */ |
145 | static void free_fib_info_rcu(struct rcu_head *head) | ||
146 | { | ||
147 | struct fib_info *fi = container_of(head, struct fib_info, rcu); | ||
148 | |||
149 | if (fi->fib_metrics != (u32 *) dst_default_metrics) | ||
150 | kfree(fi->fib_metrics); | ||
151 | kfree(fi); | ||
152 | } | ||
145 | 153 | ||
146 | void free_fib_info(struct fib_info *fi) | 154 | void free_fib_info(struct fib_info *fi) |
147 | { | 155 | { |
@@ -156,7 +164,7 @@ void free_fib_info(struct fib_info *fi) | |||
156 | } endfor_nexthops(fi); | 164 | } endfor_nexthops(fi); |
157 | fib_info_cnt--; | 165 | fib_info_cnt--; |
158 | release_net(fi->fib_net); | 166 | release_net(fi->fib_net); |
159 | kfree_rcu(fi, rcu); | 167 | call_rcu(&fi->rcu, free_fib_info_rcu); |
160 | } | 168 | } |
161 | 169 | ||
162 | void fib_release_info(struct fib_info *fi) | 170 | void fib_release_info(struct fib_info *fi) |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 58c25ea5a5c..de9e2978476 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -110,9 +110,10 @@ struct leaf { | |||
110 | 110 | ||
111 | struct leaf_info { | 111 | struct leaf_info { |
112 | struct hlist_node hlist; | 112 | struct hlist_node hlist; |
113 | struct rcu_head rcu; | ||
114 | int plen; | 113 | int plen; |
114 | u32 mask_plen; /* ntohl(inet_make_mask(plen)) */ | ||
115 | struct list_head falh; | 115 | struct list_head falh; |
116 | struct rcu_head rcu; | ||
116 | }; | 117 | }; |
117 | 118 | ||
118 | struct tnode { | 119 | struct tnode { |
@@ -451,6 +452,7 @@ static struct leaf_info *leaf_info_new(int plen) | |||
451 | struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); | 452 | struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); |
452 | if (li) { | 453 | if (li) { |
453 | li->plen = plen; | 454 | li->plen = plen; |
455 | li->mask_plen = ntohl(inet_make_mask(plen)); | ||
454 | INIT_LIST_HEAD(&li->falh); | 456 | INIT_LIST_HEAD(&li->falh); |
455 | } | 457 | } |
456 | return li; | 458 | return li; |
@@ -1359,10 +1361,8 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, | |||
1359 | 1361 | ||
1360 | hlist_for_each_entry_rcu(li, node, hhead, hlist) { | 1362 | hlist_for_each_entry_rcu(li, node, hhead, hlist) { |
1361 | struct fib_alias *fa; | 1363 | struct fib_alias *fa; |
1362 | int plen = li->plen; | ||
1363 | __be32 mask = inet_make_mask(plen); | ||
1364 | 1364 | ||
1365 | if (l->key != (key & ntohl(mask))) | 1365 | if (l->key != (key & li->mask_plen)) |
1366 | continue; | 1366 | continue; |
1367 | 1367 | ||
1368 | list_for_each_entry_rcu(fa, &li->falh, fa_list) { | 1368 | list_for_each_entry_rcu(fa, &li->falh, fa_list) { |
@@ -1394,7 +1394,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, | |||
1394 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 1394 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
1395 | t->stats.semantic_match_passed++; | 1395 | t->stats.semantic_match_passed++; |
1396 | #endif | 1396 | #endif |
1397 | res->prefixlen = plen; | 1397 | res->prefixlen = li->plen; |
1398 | res->nh_sel = nhsel; | 1398 | res->nh_sel = nhsel; |
1399 | res->type = fa->fa_type; | 1399 | res->type = fa->fa_type; |
1400 | res->scope = fa->fa_info->fib_scope; | 1400 | res->scope = fa->fa_info->fib_scope; |
@@ -1402,7 +1402,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, | |||
1402 | res->table = tb; | 1402 | res->table = tb; |
1403 | res->fa_head = &li->falh; | 1403 | res->fa_head = &li->falh; |
1404 | if (!(fib_flags & FIB_LOOKUP_NOREF)) | 1404 | if (!(fib_flags & FIB_LOOKUP_NOREF)) |
1405 | atomic_inc(&res->fi->fib_clntref); | 1405 | atomic_inc(&fi->fib_clntref); |
1406 | return 0; | 1406 | return 0; |
1407 | } | 1407 | } |
1408 | } | 1408 | } |
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index c6933f2ea31..dbfc21de347 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c | |||
@@ -15,8 +15,8 @@ | |||
15 | #include <linux/kmod.h> | 15 | #include <linux/kmod.h> |
16 | #include <linux/skbuff.h> | 16 | #include <linux/skbuff.h> |
17 | #include <linux/in.h> | 17 | #include <linux/in.h> |
18 | #include <linux/ip.h> | ||
18 | #include <linux/netdevice.h> | 19 | #include <linux/netdevice.h> |
19 | #include <linux/version.h> | ||
20 | #include <linux/spinlock.h> | 20 | #include <linux/spinlock.h> |
21 | #include <net/protocol.h> | 21 | #include <net/protocol.h> |
22 | #include <net/gre.h> | 22 | #include <net/gre.h> |
@@ -97,27 +97,17 @@ drop: | |||
97 | static void gre_err(struct sk_buff *skb, u32 info) | 97 | static void gre_err(struct sk_buff *skb, u32 info) |
98 | { | 98 | { |
99 | const struct gre_protocol *proto; | 99 | const struct gre_protocol *proto; |
100 | u8 ver; | 100 | const struct iphdr *iph = (const struct iphdr *)skb->data; |
101 | 101 | u8 ver = skb->data[(iph->ihl<<2) + 1]&0x7f; | |
102 | if (!pskb_may_pull(skb, 12)) | ||
103 | goto drop; | ||
104 | 102 | ||
105 | ver = skb->data[1]&0x7f; | ||
106 | if (ver >= GREPROTO_MAX) | 103 | if (ver >= GREPROTO_MAX) |
107 | goto drop; | 104 | return; |
108 | 105 | ||
109 | rcu_read_lock(); | 106 | rcu_read_lock(); |
110 | proto = rcu_dereference(gre_proto[ver]); | 107 | proto = rcu_dereference(gre_proto[ver]); |
111 | if (!proto || !proto->err_handler) | 108 | if (proto && proto->err_handler) |
112 | goto drop_unlock; | 109 | proto->err_handler(skb, info); |
113 | proto->err_handler(skb, info); | ||
114 | rcu_read_unlock(); | ||
115 | return; | ||
116 | |||
117 | drop_unlock: | ||
118 | rcu_read_unlock(); | 110 | rcu_read_unlock(); |
119 | drop: | ||
120 | kfree_skb(skb); | ||
121 | } | 111 | } |
122 | 112 | ||
123 | static const struct net_protocol net_gre_protocol = { | 113 | static const struct net_protocol net_gre_protocol = { |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 5395e45dcce..23ef31baa1a 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -380,6 +380,7 @@ static struct rtable *icmp_route_lookup(struct net *net, | |||
380 | struct icmp_bxm *param) | 380 | struct icmp_bxm *param) |
381 | { | 381 | { |
382 | struct rtable *rt, *rt2; | 382 | struct rtable *rt, *rt2; |
383 | struct flowi4 fl4_dec; | ||
383 | int err; | 384 | int err; |
384 | 385 | ||
385 | memset(fl4, 0, sizeof(*fl4)); | 386 | memset(fl4, 0, sizeof(*fl4)); |
@@ -408,19 +409,19 @@ static struct rtable *icmp_route_lookup(struct net *net, | |||
408 | } else | 409 | } else |
409 | return rt; | 410 | return rt; |
410 | 411 | ||
411 | err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(fl4), AF_INET); | 412 | err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4_dec), AF_INET); |
412 | if (err) | 413 | if (err) |
413 | goto relookup_failed; | 414 | goto relookup_failed; |
414 | 415 | ||
415 | if (inet_addr_type(net, fl4->saddr) == RTN_LOCAL) { | 416 | if (inet_addr_type(net, fl4_dec.saddr) == RTN_LOCAL) { |
416 | rt2 = __ip_route_output_key(net, fl4); | 417 | rt2 = __ip_route_output_key(net, &fl4_dec); |
417 | if (IS_ERR(rt2)) | 418 | if (IS_ERR(rt2)) |
418 | err = PTR_ERR(rt2); | 419 | err = PTR_ERR(rt2); |
419 | } else { | 420 | } else { |
420 | struct flowi4 fl4_2 = {}; | 421 | struct flowi4 fl4_2 = {}; |
421 | unsigned long orefdst; | 422 | unsigned long orefdst; |
422 | 423 | ||
423 | fl4_2.daddr = fl4->saddr; | 424 | fl4_2.daddr = fl4_dec.saddr; |
424 | rt2 = ip_route_output_key(net, &fl4_2); | 425 | rt2 = ip_route_output_key(net, &fl4_2); |
425 | if (IS_ERR(rt2)) { | 426 | if (IS_ERR(rt2)) { |
426 | err = PTR_ERR(rt2); | 427 | err = PTR_ERR(rt2); |
@@ -428,7 +429,7 @@ static struct rtable *icmp_route_lookup(struct net *net, | |||
428 | } | 429 | } |
429 | /* Ugh! */ | 430 | /* Ugh! */ |
430 | orefdst = skb_in->_skb_refdst; /* save old refdst */ | 431 | orefdst = skb_in->_skb_refdst; /* save old refdst */ |
431 | err = ip_route_input(skb_in, fl4->daddr, fl4->saddr, | 432 | err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr, |
432 | RT_TOS(tos), rt2->dst.dev); | 433 | RT_TOS(tos), rt2->dst.dev); |
433 | 434 | ||
434 | dst_release(&rt2->dst); | 435 | dst_release(&rt2->dst); |
@@ -440,10 +441,11 @@ static struct rtable *icmp_route_lookup(struct net *net, | |||
440 | goto relookup_failed; | 441 | goto relookup_failed; |
441 | 442 | ||
442 | rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, | 443 | rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, |
443 | flowi4_to_flowi(fl4), NULL, | 444 | flowi4_to_flowi(&fl4_dec), NULL, |
444 | XFRM_LOOKUP_ICMP); | 445 | XFRM_LOOKUP_ICMP); |
445 | if (!IS_ERR(rt2)) { | 446 | if (!IS_ERR(rt2)) { |
446 | dst_release(&rt->dst); | 447 | dst_release(&rt->dst); |
448 | memcpy(fl4, &fl4_dec, sizeof(*fl4)); | ||
447 | rt = rt2; | 449 | rt = rt2; |
448 | } else if (PTR_ERR(rt2) == -EPERM) { | 450 | } else if (PTR_ERR(rt2) == -EPERM) { |
449 | if (rt) | 451 | if (rt) |
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index f1d27f6c935..e0d42dbb33f 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -767,7 +767,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) | |||
767 | break; | 767 | break; |
768 | for (i=0; i<nsrcs; i++) { | 768 | for (i=0; i<nsrcs; i++) { |
769 | /* skip inactive filters */ | 769 | /* skip inactive filters */ |
770 | if (pmc->sfcount[MCAST_INCLUDE] || | 770 | if (psf->sf_count[MCAST_INCLUDE] || |
771 | pmc->sfcount[MCAST_EXCLUDE] != | 771 | pmc->sfcount[MCAST_EXCLUDE] != |
772 | psf->sf_count[MCAST_EXCLUDE]) | 772 | psf->sf_count[MCAST_EXCLUDE]) |
773 | continue; | 773 | continue; |
@@ -875,6 +875,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, | |||
875 | * to be intended in a v3 query. | 875 | * to be intended in a v3 query. |
876 | */ | 876 | */ |
877 | max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE); | 877 | max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE); |
878 | if (!max_delay) | ||
879 | max_delay = 1; /* can't mod w/ 0 */ | ||
878 | } else { /* v3 */ | 880 | } else { /* v3 */ |
879 | if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) | 881 | if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) |
880 | return; | 882 | return; |
@@ -1718,7 +1720,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, | |||
1718 | 1720 | ||
1719 | pmc->sfcount[sfmode]--; | 1721 | pmc->sfcount[sfmode]--; |
1720 | for (j=0; j<i; j++) | 1722 | for (j=0; j<i; j++) |
1721 | (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[i]); | 1723 | (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]); |
1722 | } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) { | 1724 | } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) { |
1723 | #ifdef CONFIG_IP_MULTICAST | 1725 | #ifdef CONFIG_IP_MULTICAST |
1724 | struct ip_sf_list *psf; | 1726 | struct ip_sf_list *psf; |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 3267d389843..389a2e6a17f 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
@@ -869,7 +869,7 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
869 | } | 869 | } |
870 | 870 | ||
871 | return netlink_dump_start(idiagnl, skb, nlh, | 871 | return netlink_dump_start(idiagnl, skb, nlh, |
872 | inet_diag_dump, NULL); | 872 | inet_diag_dump, NULL, 0); |
873 | } | 873 | } |
874 | 874 | ||
875 | return inet_diag_get_exact(skb, nlh); | 875 | return inet_diag_get_exact(skb, nlh); |
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 3c0369a3a66..984ec656b03 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -21,6 +21,7 @@ | |||
21 | 21 | ||
22 | #include <net/inet_connection_sock.h> | 22 | #include <net/inet_connection_sock.h> |
23 | #include <net/inet_hashtables.h> | 23 | #include <net/inet_hashtables.h> |
24 | #include <net/secure_seq.h> | ||
24 | #include <net/ip.h> | 25 | #include <net/ip.h> |
25 | 26 | ||
26 | /* | 27 | /* |
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index 85a0f75dae6..ef7ae6049a5 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c | |||
@@ -146,8 +146,7 @@ static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len) | |||
146 | } | 146 | } |
147 | 147 | ||
148 | static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb, | 148 | static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb, |
149 | struct iphdr *iph, struct tcphdr *tcph, | 149 | struct iphdr *iph, struct tcphdr *tcph) |
150 | u16 vlan_tag, struct vlan_group *vgrp) | ||
151 | { | 150 | { |
152 | int nr_frags; | 151 | int nr_frags; |
153 | __be32 *ptr; | 152 | __be32 *ptr; |
@@ -173,8 +172,6 @@ static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb, | |||
173 | } | 172 | } |
174 | 173 | ||
175 | lro_desc->mss = tcp_data_len; | 174 | lro_desc->mss = tcp_data_len; |
176 | lro_desc->vgrp = vgrp; | ||
177 | lro_desc->vlan_tag = vlan_tag; | ||
178 | lro_desc->active = 1; | 175 | lro_desc->active = 1; |
179 | 176 | ||
180 | lro_desc->data_csum = lro_tcp_data_csum(iph, tcph, | 177 | lro_desc->data_csum = lro_tcp_data_csum(iph, tcph, |
@@ -309,29 +306,17 @@ static void lro_flush(struct net_lro_mgr *lro_mgr, | |||
309 | 306 | ||
310 | skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss; | 307 | skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss; |
311 | 308 | ||
312 | if (lro_desc->vgrp) { | 309 | if (lro_mgr->features & LRO_F_NAPI) |
313 | if (lro_mgr->features & LRO_F_NAPI) | 310 | netif_receive_skb(lro_desc->parent); |
314 | vlan_hwaccel_receive_skb(lro_desc->parent, | 311 | else |
315 | lro_desc->vgrp, | 312 | netif_rx(lro_desc->parent); |
316 | lro_desc->vlan_tag); | ||
317 | else | ||
318 | vlan_hwaccel_rx(lro_desc->parent, | ||
319 | lro_desc->vgrp, | ||
320 | lro_desc->vlan_tag); | ||
321 | |||
322 | } else { | ||
323 | if (lro_mgr->features & LRO_F_NAPI) | ||
324 | netif_receive_skb(lro_desc->parent); | ||
325 | else | ||
326 | netif_rx(lro_desc->parent); | ||
327 | } | ||
328 | 313 | ||
329 | LRO_INC_STATS(lro_mgr, flushed); | 314 | LRO_INC_STATS(lro_mgr, flushed); |
330 | lro_clear_desc(lro_desc); | 315 | lro_clear_desc(lro_desc); |
331 | } | 316 | } |
332 | 317 | ||
333 | static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, | 318 | static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, |
334 | struct vlan_group *vgrp, u16 vlan_tag, void *priv) | 319 | void *priv) |
335 | { | 320 | { |
336 | struct net_lro_desc *lro_desc; | 321 | struct net_lro_desc *lro_desc; |
337 | struct iphdr *iph; | 322 | struct iphdr *iph; |
@@ -360,7 +345,7 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, | |||
360 | goto out; | 345 | goto out; |
361 | 346 | ||
362 | skb->ip_summed = lro_mgr->ip_summed_aggr; | 347 | skb->ip_summed = lro_mgr->ip_summed_aggr; |
363 | lro_init_desc(lro_desc, skb, iph, tcph, vlan_tag, vgrp); | 348 | lro_init_desc(lro_desc, skb, iph, tcph); |
364 | LRO_INC_STATS(lro_mgr, aggregated); | 349 | LRO_INC_STATS(lro_mgr, aggregated); |
365 | return 0; | 350 | return 0; |
366 | } | 351 | } |
@@ -433,8 +418,7 @@ static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr, | |||
433 | static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, | 418 | static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, |
434 | struct skb_frag_struct *frags, | 419 | struct skb_frag_struct *frags, |
435 | int len, int true_size, | 420 | int len, int true_size, |
436 | struct vlan_group *vgrp, | 421 | void *priv, __wsum sum) |
437 | u16 vlan_tag, void *priv, __wsum sum) | ||
438 | { | 422 | { |
439 | struct net_lro_desc *lro_desc; | 423 | struct net_lro_desc *lro_desc; |
440 | struct iphdr *iph; | 424 | struct iphdr *iph; |
@@ -480,7 +464,7 @@ static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, | |||
480 | tcph = (void *)((u8 *)skb->data + vlan_hdr_len | 464 | tcph = (void *)((u8 *)skb->data + vlan_hdr_len |
481 | + IP_HDR_LEN(iph)); | 465 | + IP_HDR_LEN(iph)); |
482 | 466 | ||
483 | lro_init_desc(lro_desc, skb, iph, tcph, 0, NULL); | 467 | lro_init_desc(lro_desc, skb, iph, tcph); |
484 | LRO_INC_STATS(lro_mgr, aggregated); | 468 | LRO_INC_STATS(lro_mgr, aggregated); |
485 | return NULL; | 469 | return NULL; |
486 | } | 470 | } |
@@ -514,7 +498,7 @@ void lro_receive_skb(struct net_lro_mgr *lro_mgr, | |||
514 | struct sk_buff *skb, | 498 | struct sk_buff *skb, |
515 | void *priv) | 499 | void *priv) |
516 | { | 500 | { |
517 | if (__lro_proc_skb(lro_mgr, skb, NULL, 0, priv)) { | 501 | if (__lro_proc_skb(lro_mgr, skb, priv)) { |
518 | if (lro_mgr->features & LRO_F_NAPI) | 502 | if (lro_mgr->features & LRO_F_NAPI) |
519 | netif_receive_skb(skb); | 503 | netif_receive_skb(skb); |
520 | else | 504 | else |
@@ -523,29 +507,13 @@ void lro_receive_skb(struct net_lro_mgr *lro_mgr, | |||
523 | } | 507 | } |
524 | EXPORT_SYMBOL(lro_receive_skb); | 508 | EXPORT_SYMBOL(lro_receive_skb); |
525 | 509 | ||
526 | void lro_vlan_hwaccel_receive_skb(struct net_lro_mgr *lro_mgr, | ||
527 | struct sk_buff *skb, | ||
528 | struct vlan_group *vgrp, | ||
529 | u16 vlan_tag, | ||
530 | void *priv) | ||
531 | { | ||
532 | if (__lro_proc_skb(lro_mgr, skb, vgrp, vlan_tag, priv)) { | ||
533 | if (lro_mgr->features & LRO_F_NAPI) | ||
534 | vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag); | ||
535 | else | ||
536 | vlan_hwaccel_rx(skb, vgrp, vlan_tag); | ||
537 | } | ||
538 | } | ||
539 | EXPORT_SYMBOL(lro_vlan_hwaccel_receive_skb); | ||
540 | |||
541 | void lro_receive_frags(struct net_lro_mgr *lro_mgr, | 510 | void lro_receive_frags(struct net_lro_mgr *lro_mgr, |
542 | struct skb_frag_struct *frags, | 511 | struct skb_frag_struct *frags, |
543 | int len, int true_size, void *priv, __wsum sum) | 512 | int len, int true_size, void *priv, __wsum sum) |
544 | { | 513 | { |
545 | struct sk_buff *skb; | 514 | struct sk_buff *skb; |
546 | 515 | ||
547 | skb = __lro_proc_segment(lro_mgr, frags, len, true_size, NULL, 0, | 516 | skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum); |
548 | priv, sum); | ||
549 | if (!skb) | 517 | if (!skb) |
550 | return; | 518 | return; |
551 | 519 | ||
@@ -556,26 +524,6 @@ void lro_receive_frags(struct net_lro_mgr *lro_mgr, | |||
556 | } | 524 | } |
557 | EXPORT_SYMBOL(lro_receive_frags); | 525 | EXPORT_SYMBOL(lro_receive_frags); |
558 | 526 | ||
559 | void lro_vlan_hwaccel_receive_frags(struct net_lro_mgr *lro_mgr, | ||
560 | struct skb_frag_struct *frags, | ||
561 | int len, int true_size, | ||
562 | struct vlan_group *vgrp, | ||
563 | u16 vlan_tag, void *priv, __wsum sum) | ||
564 | { | ||
565 | struct sk_buff *skb; | ||
566 | |||
567 | skb = __lro_proc_segment(lro_mgr, frags, len, true_size, vgrp, | ||
568 | vlan_tag, priv, sum); | ||
569 | if (!skb) | ||
570 | return; | ||
571 | |||
572 | if (lro_mgr->features & LRO_F_NAPI) | ||
573 | vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag); | ||
574 | else | ||
575 | vlan_hwaccel_rx(skb, vgrp, vlan_tag); | ||
576 | } | ||
577 | EXPORT_SYMBOL(lro_vlan_hwaccel_receive_frags); | ||
578 | |||
579 | void lro_flush_all(struct net_lro_mgr *lro_mgr) | 527 | void lro_flush_all(struct net_lro_mgr *lro_mgr) |
580 | { | 528 | { |
581 | int i; | 529 | int i; |
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index ce616d92cc5..86f13c67ea8 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/net.h> | 19 | #include <linux/net.h> |
20 | #include <net/ip.h> | 20 | #include <net/ip.h> |
21 | #include <net/inetpeer.h> | 21 | #include <net/inetpeer.h> |
22 | #include <net/secure_seq.h> | ||
22 | 23 | ||
23 | /* | 24 | /* |
24 | * Theory of operations. | 25 | * Theory of operations. |
@@ -54,15 +55,11 @@ | |||
54 | * 1. Nodes may appear in the tree only with the pool lock held. | 55 | * 1. Nodes may appear in the tree only with the pool lock held. |
55 | * 2. Nodes may disappear from the tree only with the pool lock held | 56 | * 2. Nodes may disappear from the tree only with the pool lock held |
56 | * AND reference count being 0. | 57 | * AND reference count being 0. |
57 | * 3. Nodes appears and disappears from unused node list only under | 58 | * 3. Global variable peer_total is modified under the pool lock. |
58 | * "inet_peer_unused_lock". | 59 | * 4. struct inet_peer fields modification: |
59 | * 4. Global variable peer_total is modified under the pool lock. | ||
60 | * 5. struct inet_peer fields modification: | ||
61 | * avl_left, avl_right, avl_parent, avl_height: pool lock | 60 | * avl_left, avl_right, avl_parent, avl_height: pool lock |
62 | * unused: unused node list lock | ||
63 | * refcnt: atomically against modifications on other CPU; | 61 | * refcnt: atomically against modifications on other CPU; |
64 | * usually under some other lock to prevent node disappearing | 62 | * usually under some other lock to prevent node disappearing |
65 | * dtime: unused node list lock | ||
66 | * daddr: unchangeable | 63 | * daddr: unchangeable |
67 | * ip_id_count: atomic value (no lock needed) | 64 | * ip_id_count: atomic value (no lock needed) |
68 | */ | 65 | */ |
@@ -104,19 +101,6 @@ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries m | |||
104 | * aggressively at this stage */ | 101 | * aggressively at this stage */ |
105 | int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ | 102 | int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ |
106 | int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ | 103 | int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ |
107 | int inet_peer_gc_mintime __read_mostly = 10 * HZ; | ||
108 | int inet_peer_gc_maxtime __read_mostly = 120 * HZ; | ||
109 | |||
110 | static struct { | ||
111 | struct list_head list; | ||
112 | spinlock_t lock; | ||
113 | } unused_peers = { | ||
114 | .list = LIST_HEAD_INIT(unused_peers.list), | ||
115 | .lock = __SPIN_LOCK_UNLOCKED(unused_peers.lock), | ||
116 | }; | ||
117 | |||
118 | static void peer_check_expire(unsigned long dummy); | ||
119 | static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); | ||
120 | 104 | ||
121 | 105 | ||
122 | /* Called from ip_output.c:ip_init */ | 106 | /* Called from ip_output.c:ip_init */ |
@@ -142,21 +126,6 @@ void __init inet_initpeers(void) | |||
142 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, | 126 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, |
143 | NULL); | 127 | NULL); |
144 | 128 | ||
145 | /* All the timers, started at system startup tend | ||
146 | to synchronize. Perturb it a bit. | ||
147 | */ | ||
148 | peer_periodic_timer.expires = jiffies | ||
149 | + net_random() % inet_peer_gc_maxtime | ||
150 | + inet_peer_gc_maxtime; | ||
151 | add_timer(&peer_periodic_timer); | ||
152 | } | ||
153 | |||
154 | /* Called with or without local BH being disabled. */ | ||
155 | static void unlink_from_unused(struct inet_peer *p) | ||
156 | { | ||
157 | spin_lock_bh(&unused_peers.lock); | ||
158 | list_del_init(&p->unused); | ||
159 | spin_unlock_bh(&unused_peers.lock); | ||
160 | } | 129 | } |
161 | 130 | ||
162 | static int addr_compare(const struct inetpeer_addr *a, | 131 | static int addr_compare(const struct inetpeer_addr *a, |
@@ -203,20 +172,6 @@ static int addr_compare(const struct inetpeer_addr *a, | |||
203 | u; \ | 172 | u; \ |
204 | }) | 173 | }) |
205 | 174 | ||
206 | static bool atomic_add_unless_return(atomic_t *ptr, int a, int u, int *newv) | ||
207 | { | ||
208 | int cur, old = atomic_read(ptr); | ||
209 | |||
210 | while (old != u) { | ||
211 | *newv = old + a; | ||
212 | cur = atomic_cmpxchg(ptr, old, *newv); | ||
213 | if (cur == old) | ||
214 | return true; | ||
215 | old = cur; | ||
216 | } | ||
217 | return false; | ||
218 | } | ||
219 | |||
220 | /* | 175 | /* |
221 | * Called with rcu_read_lock() | 176 | * Called with rcu_read_lock() |
222 | * Because we hold no lock against a writer, its quite possible we fall | 177 | * Because we hold no lock against a writer, its quite possible we fall |
@@ -225,8 +180,7 @@ static bool atomic_add_unless_return(atomic_t *ptr, int a, int u, int *newv) | |||
225 | * We exit from this function if number of links exceeds PEER_MAXDEPTH | 180 | * We exit from this function if number of links exceeds PEER_MAXDEPTH |
226 | */ | 181 | */ |
227 | static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, | 182 | static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, |
228 | struct inet_peer_base *base, | 183 | struct inet_peer_base *base) |
229 | int *newrefcnt) | ||
230 | { | 184 | { |
231 | struct inet_peer *u = rcu_dereference(base->root); | 185 | struct inet_peer *u = rcu_dereference(base->root); |
232 | int count = 0; | 186 | int count = 0; |
@@ -235,11 +189,9 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, | |||
235 | int cmp = addr_compare(daddr, &u->daddr); | 189 | int cmp = addr_compare(daddr, &u->daddr); |
236 | if (cmp == 0) { | 190 | if (cmp == 0) { |
237 | /* Before taking a reference, check if this entry was | 191 | /* Before taking a reference, check if this entry was |
238 | * deleted, unlink_from_pool() sets refcnt=-1 to make | 192 | * deleted (refcnt=-1) |
239 | * distinction between an unused entry (refcnt=0) and | ||
240 | * a freed one. | ||
241 | */ | 193 | */ |
242 | if (!atomic_add_unless_return(&u->refcnt, 1, -1, newrefcnt)) | 194 | if (!atomic_add_unless(&u->refcnt, 1, -1)) |
243 | u = NULL; | 195 | u = NULL; |
244 | return u; | 196 | return u; |
245 | } | 197 | } |
@@ -366,137 +318,99 @@ static void inetpeer_free_rcu(struct rcu_head *head) | |||
366 | kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu)); | 318 | kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu)); |
367 | } | 319 | } |
368 | 320 | ||
369 | /* May be called with local BH enabled. */ | ||
370 | static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, | 321 | static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, |
371 | struct inet_peer __rcu **stack[PEER_MAXDEPTH]) | 322 | struct inet_peer __rcu **stack[PEER_MAXDEPTH]) |
372 | { | 323 | { |
373 | int do_free; | 324 | struct inet_peer __rcu ***stackptr, ***delp; |
374 | 325 | ||
375 | do_free = 0; | 326 | if (lookup(&p->daddr, stack, base) != p) |
376 | 327 | BUG(); | |
377 | write_seqlock_bh(&base->lock); | 328 | delp = stackptr - 1; /* *delp[0] == p */ |
378 | /* Check the reference counter. It was artificially incremented by 1 | 329 | if (p->avl_left == peer_avl_empty_rcu) { |
379 | * in cleanup() function to prevent sudden disappearing. If we can | 330 | *delp[0] = p->avl_right; |
380 | * atomically (because of lockless readers) take this last reference, | 331 | --stackptr; |
381 | * it's safe to remove the node and free it later. | 332 | } else { |
382 | * We use refcnt=-1 to alert lockless readers this entry is deleted. | 333 | /* look for a node to insert instead of p */ |
383 | */ | 334 | struct inet_peer *t; |
384 | if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { | 335 | t = lookup_rightempty(p, base); |
385 | struct inet_peer __rcu ***stackptr, ***delp; | 336 | BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t); |
386 | if (lookup(&p->daddr, stack, base) != p) | 337 | **--stackptr = t->avl_left; |
387 | BUG(); | 338 | /* t is removed, t->daddr > x->daddr for any |
388 | delp = stackptr - 1; /* *delp[0] == p */ | 339 | * x in p->avl_left subtree. |
389 | if (p->avl_left == peer_avl_empty_rcu) { | 340 | * Put t in the old place of p. */ |
390 | *delp[0] = p->avl_right; | 341 | RCU_INIT_POINTER(*delp[0], t); |
391 | --stackptr; | 342 | t->avl_left = p->avl_left; |
392 | } else { | 343 | t->avl_right = p->avl_right; |
393 | /* look for a node to insert instead of p */ | 344 | t->avl_height = p->avl_height; |
394 | struct inet_peer *t; | 345 | BUG_ON(delp[1] != &p->avl_left); |
395 | t = lookup_rightempty(p, base); | 346 | delp[1] = &t->avl_left; /* was &p->avl_left */ |
396 | BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t); | ||
397 | **--stackptr = t->avl_left; | ||
398 | /* t is removed, t->daddr > x->daddr for any | ||
399 | * x in p->avl_left subtree. | ||
400 | * Put t in the old place of p. */ | ||
401 | RCU_INIT_POINTER(*delp[0], t); | ||
402 | t->avl_left = p->avl_left; | ||
403 | t->avl_right = p->avl_right; | ||
404 | t->avl_height = p->avl_height; | ||
405 | BUG_ON(delp[1] != &p->avl_left); | ||
406 | delp[1] = &t->avl_left; /* was &p->avl_left */ | ||
407 | } | ||
408 | peer_avl_rebalance(stack, stackptr, base); | ||
409 | base->total--; | ||
410 | do_free = 1; | ||
411 | } | 347 | } |
412 | write_sequnlock_bh(&base->lock); | 348 | peer_avl_rebalance(stack, stackptr, base); |
413 | 349 | base->total--; | |
414 | if (do_free) | 350 | call_rcu(&p->rcu, inetpeer_free_rcu); |
415 | call_rcu(&p->rcu, inetpeer_free_rcu); | ||
416 | else | ||
417 | /* The node is used again. Decrease the reference counter | ||
418 | * back. The loop "cleanup -> unlink_from_unused | ||
419 | * -> unlink_from_pool -> putpeer -> link_to_unused | ||
420 | * -> cleanup (for the same node)" | ||
421 | * doesn't really exist because the entry will have a | ||
422 | * recent deletion time and will not be cleaned again soon. | ||
423 | */ | ||
424 | inet_putpeer(p); | ||
425 | } | 351 | } |
426 | 352 | ||
427 | static struct inet_peer_base *family_to_base(int family) | 353 | static struct inet_peer_base *family_to_base(int family) |
428 | { | 354 | { |
429 | return (family == AF_INET ? &v4_peers : &v6_peers); | 355 | return family == AF_INET ? &v4_peers : &v6_peers; |
430 | } | ||
431 | |||
432 | static struct inet_peer_base *peer_to_base(struct inet_peer *p) | ||
433 | { | ||
434 | return family_to_base(p->daddr.family); | ||
435 | } | 356 | } |
436 | 357 | ||
437 | /* May be called with local BH enabled. */ | 358 | /* perform garbage collect on all items stacked during a lookup */ |
438 | static int cleanup_once(unsigned long ttl, struct inet_peer __rcu **stack[PEER_MAXDEPTH]) | 359 | static int inet_peer_gc(struct inet_peer_base *base, |
360 | struct inet_peer __rcu **stack[PEER_MAXDEPTH], | ||
361 | struct inet_peer __rcu ***stackptr) | ||
439 | { | 362 | { |
440 | struct inet_peer *p = NULL; | 363 | struct inet_peer *p, *gchead = NULL; |
441 | 364 | __u32 delta, ttl; | |
442 | /* Remove the first entry from the list of unused nodes. */ | 365 | int cnt = 0; |
443 | spin_lock_bh(&unused_peers.lock); | ||
444 | if (!list_empty(&unused_peers.list)) { | ||
445 | __u32 delta; | ||
446 | |||
447 | p = list_first_entry(&unused_peers.list, struct inet_peer, unused); | ||
448 | delta = (__u32)jiffies - p->dtime; | ||
449 | 366 | ||
450 | if (delta < ttl) { | 367 | if (base->total >= inet_peer_threshold) |
451 | /* Do not prune fresh entries. */ | 368 | ttl = 0; /* be aggressive */ |
452 | spin_unlock_bh(&unused_peers.lock); | 369 | else |
453 | return -1; | 370 | ttl = inet_peer_maxttl |
371 | - (inet_peer_maxttl - inet_peer_minttl) / HZ * | ||
372 | base->total / inet_peer_threshold * HZ; | ||
373 | stackptr--; /* last stack slot is peer_avl_empty */ | ||
374 | while (stackptr > stack) { | ||
375 | stackptr--; | ||
376 | p = rcu_deref_locked(**stackptr, base); | ||
377 | if (atomic_read(&p->refcnt) == 0) { | ||
378 | smp_rmb(); | ||
379 | delta = (__u32)jiffies - p->dtime; | ||
380 | if (delta >= ttl && | ||
381 | atomic_cmpxchg(&p->refcnt, 0, -1) == 0) { | ||
382 | p->gc_next = gchead; | ||
383 | gchead = p; | ||
384 | } | ||
454 | } | 385 | } |
455 | |||
456 | list_del_init(&p->unused); | ||
457 | |||
458 | /* Grab an extra reference to prevent node disappearing | ||
459 | * before unlink_from_pool() call. */ | ||
460 | atomic_inc(&p->refcnt); | ||
461 | } | 386 | } |
462 | spin_unlock_bh(&unused_peers.lock); | 387 | while ((p = gchead) != NULL) { |
463 | 388 | gchead = p->gc_next; | |
464 | if (p == NULL) | 389 | cnt++; |
465 | /* It means that the total number of USED entries has | 390 | unlink_from_pool(p, base, stack); |
466 | * grown over inet_peer_threshold. It shouldn't really | 391 | } |
467 | * happen because of entry limits in route cache. */ | 392 | return cnt; |
468 | return -1; | ||
469 | |||
470 | unlink_from_pool(p, peer_to_base(p), stack); | ||
471 | return 0; | ||
472 | } | 393 | } |
473 | 394 | ||
474 | /* Called with or without local BH being disabled. */ | 395 | struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create) |
475 | struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) | ||
476 | { | 396 | { |
477 | struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; | 397 | struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; |
478 | struct inet_peer_base *base = family_to_base(daddr->family); | 398 | struct inet_peer_base *base = family_to_base(daddr->family); |
479 | struct inet_peer *p; | 399 | struct inet_peer *p; |
480 | unsigned int sequence; | 400 | unsigned int sequence; |
481 | int invalidated, newrefcnt = 0; | 401 | int invalidated, gccnt = 0; |
482 | 402 | ||
483 | /* Look up for the address quickly, lockless. | 403 | /* Attempt a lockless lookup first. |
484 | * Because of a concurrent writer, we might not find an existing entry. | 404 | * Because of a concurrent writer, we might not find an existing entry. |
485 | */ | 405 | */ |
486 | rcu_read_lock(); | 406 | rcu_read_lock(); |
487 | sequence = read_seqbegin(&base->lock); | 407 | sequence = read_seqbegin(&base->lock); |
488 | p = lookup_rcu(daddr, base, &newrefcnt); | 408 | p = lookup_rcu(daddr, base); |
489 | invalidated = read_seqretry(&base->lock, sequence); | 409 | invalidated = read_seqretry(&base->lock, sequence); |
490 | rcu_read_unlock(); | 410 | rcu_read_unlock(); |
491 | 411 | ||
492 | if (p) { | 412 | if (p) |
493 | found: /* The existing node has been found. | ||
494 | * Remove the entry from unused list if it was there. | ||
495 | */ | ||
496 | if (newrefcnt == 1) | ||
497 | unlink_from_unused(p); | ||
498 | return p; | 413 | return p; |
499 | } | ||
500 | 414 | ||
501 | /* If no writer did a change during our lookup, we can return early. */ | 415 | /* If no writer did a change during our lookup, we can return early. */ |
502 | if (!create && !invalidated) | 416 | if (!create && !invalidated) |
@@ -506,18 +420,27 @@ found: /* The existing node has been found. | |||
506 | * At least, nodes should be hot in our cache. | 420 | * At least, nodes should be hot in our cache. |
507 | */ | 421 | */ |
508 | write_seqlock_bh(&base->lock); | 422 | write_seqlock_bh(&base->lock); |
423 | relookup: | ||
509 | p = lookup(daddr, stack, base); | 424 | p = lookup(daddr, stack, base); |
510 | if (p != peer_avl_empty) { | 425 | if (p != peer_avl_empty) { |
511 | newrefcnt = atomic_inc_return(&p->refcnt); | 426 | atomic_inc(&p->refcnt); |
512 | write_sequnlock_bh(&base->lock); | 427 | write_sequnlock_bh(&base->lock); |
513 | goto found; | 428 | return p; |
429 | } | ||
430 | if (!gccnt) { | ||
431 | gccnt = inet_peer_gc(base, stack, stackptr); | ||
432 | if (gccnt && create) | ||
433 | goto relookup; | ||
514 | } | 434 | } |
515 | p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; | 435 | p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; |
516 | if (p) { | 436 | if (p) { |
517 | p->daddr = *daddr; | 437 | p->daddr = *daddr; |
518 | atomic_set(&p->refcnt, 1); | 438 | atomic_set(&p->refcnt, 1); |
519 | atomic_set(&p->rid, 0); | 439 | atomic_set(&p->rid, 0); |
520 | atomic_set(&p->ip_id_count, secure_ip_id(daddr->addr.a4)); | 440 | atomic_set(&p->ip_id_count, |
441 | (daddr->family == AF_INET) ? | ||
442 | secure_ip_id(daddr->addr.a4) : | ||
443 | secure_ipv6_id(daddr->addr.a6)); | ||
521 | p->tcp_ts_stamp = 0; | 444 | p->tcp_ts_stamp = 0; |
522 | p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; | 445 | p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; |
523 | p->rate_tokens = 0; | 446 | p->rate_tokens = 0; |
@@ -525,7 +448,6 @@ found: /* The existing node has been found. | |||
525 | p->pmtu_expires = 0; | 448 | p->pmtu_expires = 0; |
526 | p->pmtu_orig = 0; | 449 | p->pmtu_orig = 0; |
527 | memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); | 450 | memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); |
528 | INIT_LIST_HEAD(&p->unused); | ||
529 | 451 | ||
530 | 452 | ||
531 | /* Link the node. */ | 453 | /* Link the node. */ |
@@ -534,63 +456,15 @@ found: /* The existing node has been found. | |||
534 | } | 456 | } |
535 | write_sequnlock_bh(&base->lock); | 457 | write_sequnlock_bh(&base->lock); |
536 | 458 | ||
537 | if (base->total >= inet_peer_threshold) | ||
538 | /* Remove one less-recently-used entry. */ | ||
539 | cleanup_once(0, stack); | ||
540 | |||
541 | return p; | 459 | return p; |
542 | } | 460 | } |
543 | |||
544 | static int compute_total(void) | ||
545 | { | ||
546 | return v4_peers.total + v6_peers.total; | ||
547 | } | ||
548 | EXPORT_SYMBOL_GPL(inet_getpeer); | 461 | EXPORT_SYMBOL_GPL(inet_getpeer); |
549 | 462 | ||
550 | /* Called with local BH disabled. */ | ||
551 | static void peer_check_expire(unsigned long dummy) | ||
552 | { | ||
553 | unsigned long now = jiffies; | ||
554 | int ttl, total; | ||
555 | struct inet_peer __rcu **stack[PEER_MAXDEPTH]; | ||
556 | |||
557 | total = compute_total(); | ||
558 | if (total >= inet_peer_threshold) | ||
559 | ttl = inet_peer_minttl; | ||
560 | else | ||
561 | ttl = inet_peer_maxttl | ||
562 | - (inet_peer_maxttl - inet_peer_minttl) / HZ * | ||
563 | total / inet_peer_threshold * HZ; | ||
564 | while (!cleanup_once(ttl, stack)) { | ||
565 | if (jiffies != now) | ||
566 | break; | ||
567 | } | ||
568 | |||
569 | /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime | ||
570 | * interval depending on the total number of entries (more entries, | ||
571 | * less interval). */ | ||
572 | total = compute_total(); | ||
573 | if (total >= inet_peer_threshold) | ||
574 | peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; | ||
575 | else | ||
576 | peer_periodic_timer.expires = jiffies | ||
577 | + inet_peer_gc_maxtime | ||
578 | - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * | ||
579 | total / inet_peer_threshold * HZ; | ||
580 | add_timer(&peer_periodic_timer); | ||
581 | } | ||
582 | |||
583 | void inet_putpeer(struct inet_peer *p) | 463 | void inet_putpeer(struct inet_peer *p) |
584 | { | 464 | { |
585 | local_bh_disable(); | 465 | p->dtime = (__u32)jiffies; |
586 | 466 | smp_mb__before_atomic_dec(); | |
587 | if (atomic_dec_and_lock(&p->refcnt, &unused_peers.lock)) { | 467 | atomic_dec(&p->refcnt); |
588 | list_add_tail(&p->unused, &unused_peers.list); | ||
589 | p->dtime = (__u32)jiffies; | ||
590 | spin_unlock(&unused_peers.lock); | ||
591 | } | ||
592 | |||
593 | local_bh_enable(); | ||
594 | } | 468 | } |
595 | EXPORT_SYMBOL_GPL(inet_putpeer); | 469 | EXPORT_SYMBOL_GPL(inet_putpeer); |
596 | 470 | ||
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 0ad6035f636..0e0ab98abc6 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -261,8 +261,9 @@ static void ip_expire(unsigned long arg) | |||
261 | * Only an end host needs to send an ICMP | 261 | * Only an end host needs to send an ICMP |
262 | * "Fragment Reassembly Timeout" message, per RFC792. | 262 | * "Fragment Reassembly Timeout" message, per RFC792. |
263 | */ | 263 | */ |
264 | if (qp->user == IP_DEFRAG_CONNTRACK_IN && | 264 | if (qp->user == IP_DEFRAG_AF_PACKET || |
265 | skb_rtable(head)->rt_type != RTN_LOCAL) | 265 | (qp->user == IP_DEFRAG_CONNTRACK_IN && |
266 | skb_rtable(head)->rt_type != RTN_LOCAL)) | ||
266 | goto out_rcu_unlock; | 267 | goto out_rcu_unlock; |
267 | 268 | ||
268 | 269 | ||
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 8871067560d..d7bb94c4834 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -731,9 +731,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
731 | } | 731 | } |
732 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 732 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
733 | else if (skb->protocol == htons(ETH_P_IPV6)) { | 733 | else if (skb->protocol == htons(ETH_P_IPV6)) { |
734 | struct neighbour *neigh = dst_get_neighbour(skb_dst(skb)); | ||
734 | const struct in6_addr *addr6; | 735 | const struct in6_addr *addr6; |
735 | int addr_type; | 736 | int addr_type; |
736 | struct neighbour *neigh = skb_dst(skb)->neighbour; | ||
737 | 737 | ||
738 | if (neigh == NULL) | 738 | if (neigh == NULL) |
739 | goto tx_error; | 739 | goto tx_error; |
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index c8f48efc5fd..073a9b01c40 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -165,7 +165,7 @@ int ip_call_ra_chain(struct sk_buff *skb) | |||
165 | (!sk->sk_bound_dev_if || | 165 | (!sk->sk_bound_dev_if || |
166 | sk->sk_bound_dev_if == dev->ifindex) && | 166 | sk->sk_bound_dev_if == dev->ifindex) && |
167 | net_eq(sock_net(sk), dev_net(dev))) { | 167 | net_eq(sock_net(sk), dev_net(dev))) { |
168 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { | 168 | if (ip_is_fragment(ip_hdr(skb))) { |
169 | if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) | 169 | if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) |
170 | return 1; | 170 | return 1; |
171 | } | 171 | } |
@@ -256,7 +256,7 @@ int ip_local_deliver(struct sk_buff *skb) | |||
256 | * Reassemble IP fragments. | 256 | * Reassemble IP fragments. |
257 | */ | 257 | */ |
258 | 258 | ||
259 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { | 259 | if (ip_is_fragment(ip_hdr(skb))) { |
260 | if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER)) | 260 | if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER)) |
261 | return 0; | 261 | return 0; |
262 | } | 262 | } |
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index ec93335901d..05d20cca9d6 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c | |||
@@ -640,6 +640,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) | |||
640 | } | 640 | } |
641 | if (srrptr <= srrspace) { | 641 | if (srrptr <= srrspace) { |
642 | opt->srr_is_hit = 1; | 642 | opt->srr_is_hit = 1; |
643 | iph->daddr = nexthop; | ||
643 | opt->is_changed = 1; | 644 | opt->is_changed = 1; |
644 | } | 645 | } |
645 | return 0; | 646 | return 0; |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 84f26e8e6c6..8c6563361ab 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -122,6 +122,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb) | |||
122 | newskb->pkt_type = PACKET_LOOPBACK; | 122 | newskb->pkt_type = PACKET_LOOPBACK; |
123 | newskb->ip_summed = CHECKSUM_UNNECESSARY; | 123 | newskb->ip_summed = CHECKSUM_UNNECESSARY; |
124 | WARN_ON(!skb_dst(newskb)); | 124 | WARN_ON(!skb_dst(newskb)); |
125 | skb_dst_force(newskb); | ||
125 | netif_rx_ni(newskb); | 126 | netif_rx_ni(newskb); |
126 | return 0; | 127 | return 0; |
127 | } | 128 | } |
@@ -182,6 +183,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) | |||
182 | struct rtable *rt = (struct rtable *)dst; | 183 | struct rtable *rt = (struct rtable *)dst; |
183 | struct net_device *dev = dst->dev; | 184 | struct net_device *dev = dst->dev; |
184 | unsigned int hh_len = LL_RESERVED_SPACE(dev); | 185 | unsigned int hh_len = LL_RESERVED_SPACE(dev); |
186 | struct neighbour *neigh; | ||
185 | 187 | ||
186 | if (rt->rt_type == RTN_MULTICAST) { | 188 | if (rt->rt_type == RTN_MULTICAST) { |
187 | IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); | 189 | IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); |
@@ -203,10 +205,15 @@ static inline int ip_finish_output2(struct sk_buff *skb) | |||
203 | skb = skb2; | 205 | skb = skb2; |
204 | } | 206 | } |
205 | 207 | ||
206 | if (dst->hh) | 208 | rcu_read_lock(); |
207 | return neigh_hh_output(dst->hh, skb); | 209 | neigh = dst_get_neighbour(dst); |
208 | else if (dst->neighbour) | 210 | if (neigh) { |
209 | return dst->neighbour->output(skb); | 211 | int res = neigh_output(neigh, skb); |
212 | |||
213 | rcu_read_unlock(); | ||
214 | return res; | ||
215 | } | ||
216 | rcu_read_unlock(); | ||
210 | 217 | ||
211 | if (net_ratelimit()) | 218 | if (net_ratelimit()) |
212 | printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); | 219 | printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); |
@@ -489,7 +496,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
489 | 496 | ||
490 | if (first_len - hlen > mtu || | 497 | if (first_len - hlen > mtu || |
491 | ((first_len - hlen) & 7) || | 498 | ((first_len - hlen) & 7) || |
492 | (iph->frag_off & htons(IP_MF|IP_OFFSET)) || | 499 | ip_is_fragment(iph) || |
493 | skb_cloned(skb)) | 500 | skb_cloned(skb)) |
494 | goto slow_path; | 501 | goto slow_path; |
495 | 502 | ||
@@ -734,7 +741,7 @@ static inline int ip_ufo_append_data(struct sock *sk, | |||
734 | int getfrag(void *from, char *to, int offset, int len, | 741 | int getfrag(void *from, char *to, int offset, int len, |
735 | int odd, struct sk_buff *skb), | 742 | int odd, struct sk_buff *skb), |
736 | void *from, int length, int hh_len, int fragheaderlen, | 743 | void *from, int length, int hh_len, int fragheaderlen, |
737 | int transhdrlen, int mtu, unsigned int flags) | 744 | int transhdrlen, int maxfraglen, unsigned int flags) |
738 | { | 745 | { |
739 | struct sk_buff *skb; | 746 | struct sk_buff *skb; |
740 | int err; | 747 | int err; |
@@ -767,7 +774,7 @@ static inline int ip_ufo_append_data(struct sock *sk, | |||
767 | skb->csum = 0; | 774 | skb->csum = 0; |
768 | 775 | ||
769 | /* specify the length of each IP datagram fragment */ | 776 | /* specify the length of each IP datagram fragment */ |
770 | skb_shinfo(skb)->gso_size = mtu - fragheaderlen; | 777 | skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen; |
771 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; | 778 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; |
772 | __skb_queue_tail(queue, skb); | 779 | __skb_queue_tail(queue, skb); |
773 | } | 780 | } |
@@ -831,7 +838,7 @@ static int __ip_append_data(struct sock *sk, | |||
831 | (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) { | 838 | (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) { |
832 | err = ip_ufo_append_data(sk, queue, getfrag, from, length, | 839 | err = ip_ufo_append_data(sk, queue, getfrag, from, length, |
833 | hh_len, fragheaderlen, transhdrlen, | 840 | hh_len, fragheaderlen, transhdrlen, |
834 | mtu, flags); | 841 | maxfraglen, flags); |
835 | if (err) | 842 | if (err) |
836 | goto error; | 843 | goto error; |
837 | return 0; | 844 | return 0; |
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ab0c9efd1ef..8905e92f896 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
@@ -1067,7 +1067,7 @@ EXPORT_SYMBOL(compat_ip_setsockopt); | |||
1067 | */ | 1067 | */ |
1068 | 1068 | ||
1069 | static int do_ip_getsockopt(struct sock *sk, int level, int optname, | 1069 | static int do_ip_getsockopt(struct sock *sk, int level, int optname, |
1070 | char __user *optval, int __user *optlen) | 1070 | char __user *optval, int __user *optlen, unsigned flags) |
1071 | { | 1071 | { |
1072 | struct inet_sock *inet = inet_sk(sk); | 1072 | struct inet_sock *inet = inet_sk(sk); |
1073 | int val; | 1073 | int val; |
@@ -1240,7 +1240,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, | |||
1240 | 1240 | ||
1241 | msg.msg_control = optval; | 1241 | msg.msg_control = optval; |
1242 | msg.msg_controllen = len; | 1242 | msg.msg_controllen = len; |
1243 | msg.msg_flags = 0; | 1243 | msg.msg_flags = flags; |
1244 | 1244 | ||
1245 | if (inet->cmsg_flags & IP_CMSG_PKTINFO) { | 1245 | if (inet->cmsg_flags & IP_CMSG_PKTINFO) { |
1246 | struct in_pktinfo info; | 1246 | struct in_pktinfo info; |
@@ -1294,7 +1294,7 @@ int ip_getsockopt(struct sock *sk, int level, | |||
1294 | { | 1294 | { |
1295 | int err; | 1295 | int err; |
1296 | 1296 | ||
1297 | err = do_ip_getsockopt(sk, level, optname, optval, optlen); | 1297 | err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0); |
1298 | #ifdef CONFIG_NETFILTER | 1298 | #ifdef CONFIG_NETFILTER |
1299 | /* we need to exclude all possible ENOPROTOOPTs except default case */ | 1299 | /* we need to exclude all possible ENOPROTOOPTs except default case */ |
1300 | if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && | 1300 | if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && |
@@ -1327,7 +1327,8 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, | |||
1327 | return compat_mc_getsockopt(sk, level, optname, optval, optlen, | 1327 | return compat_mc_getsockopt(sk, level, optname, optval, optlen, |
1328 | ip_getsockopt); | 1328 | ip_getsockopt); |
1329 | 1329 | ||
1330 | err = do_ip_getsockopt(sk, level, optname, optval, optlen); | 1330 | err = do_ip_getsockopt(sk, level, optname, optval, optlen, |
1331 | MSG_CMSG_COMPAT); | ||
1331 | 1332 | ||
1332 | #ifdef CONFIG_NETFILTER | 1333 | #ifdef CONFIG_NETFILTER |
1333 | /* we need to exclude all possible ENOPROTOOPTs except default case */ | 1334 | /* we need to exclude all possible ENOPROTOOPTs except default case */ |
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index ab7e5542c1c..004bb74b41c 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -252,6 +252,10 @@ static int __init ic_open_devs(void) | |||
252 | } | 252 | } |
253 | } | 253 | } |
254 | 254 | ||
255 | /* no point in waiting if we could not bring up at least one device */ | ||
256 | if (!ic_first_dev) | ||
257 | goto have_carrier; | ||
258 | |||
255 | /* wait for a carrier on at least one device */ | 259 | /* wait for a carrier on at least one device */ |
256 | start = jiffies; | 260 | start = jiffies; |
257 | while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) { | 261 | while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) { |
@@ -861,41 +865,44 @@ static void __init ic_do_bootp_ext(u8 *ext) | |||
861 | #endif | 865 | #endif |
862 | 866 | ||
863 | switch (*ext++) { | 867 | switch (*ext++) { |
864 | case 1: /* Subnet mask */ | 868 | case 1: /* Subnet mask */ |
865 | if (ic_netmask == NONE) | 869 | if (ic_netmask == NONE) |
866 | memcpy(&ic_netmask, ext+1, 4); | 870 | memcpy(&ic_netmask, ext+1, 4); |
867 | break; | 871 | break; |
868 | case 3: /* Default gateway */ | 872 | case 3: /* Default gateway */ |
869 | if (ic_gateway == NONE) | 873 | if (ic_gateway == NONE) |
870 | memcpy(&ic_gateway, ext+1, 4); | 874 | memcpy(&ic_gateway, ext+1, 4); |
871 | break; | 875 | break; |
872 | case 6: /* DNS server */ | 876 | case 6: /* DNS server */ |
873 | servers= *ext/4; | 877 | servers= *ext/4; |
874 | if (servers > CONF_NAMESERVERS_MAX) | 878 | if (servers > CONF_NAMESERVERS_MAX) |
875 | servers = CONF_NAMESERVERS_MAX; | 879 | servers = CONF_NAMESERVERS_MAX; |
876 | for (i = 0; i < servers; i++) { | 880 | for (i = 0; i < servers; i++) { |
877 | if (ic_nameservers[i] == NONE) | 881 | if (ic_nameservers[i] == NONE) |
878 | memcpy(&ic_nameservers[i], ext+1+4*i, 4); | 882 | memcpy(&ic_nameservers[i], ext+1+4*i, 4); |
879 | } | 883 | } |
880 | break; | 884 | break; |
881 | case 12: /* Host name */ | 885 | case 12: /* Host name */ |
882 | ic_bootp_string(utsname()->nodename, ext+1, *ext, __NEW_UTS_LEN); | 886 | ic_bootp_string(utsname()->nodename, ext+1, *ext, |
883 | ic_host_name_set = 1; | 887 | __NEW_UTS_LEN); |
884 | break; | 888 | ic_host_name_set = 1; |
885 | case 15: /* Domain name (DNS) */ | 889 | break; |
886 | ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain)); | 890 | case 15: /* Domain name (DNS) */ |
887 | break; | 891 | ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain)); |
888 | case 17: /* Root path */ | 892 | break; |
889 | if (!root_server_path[0]) | 893 | case 17: /* Root path */ |
890 | ic_bootp_string(root_server_path, ext+1, *ext, sizeof(root_server_path)); | 894 | if (!root_server_path[0]) |
891 | break; | 895 | ic_bootp_string(root_server_path, ext+1, *ext, |
892 | case 26: /* Interface MTU */ | 896 | sizeof(root_server_path)); |
893 | memcpy(&mtu, ext+1, sizeof(mtu)); | 897 | break; |
894 | ic_dev_mtu = ntohs(mtu); | 898 | case 26: /* Interface MTU */ |
895 | break; | 899 | memcpy(&mtu, ext+1, sizeof(mtu)); |
896 | case 40: /* NIS Domain name (_not_ DNS) */ | 900 | ic_dev_mtu = ntohs(mtu); |
897 | ic_bootp_string(utsname()->domainname, ext+1, *ext, __NEW_UTS_LEN); | 901 | break; |
898 | break; | 902 | case 40: /* NIS Domain name (_not_ DNS) */ |
903 | ic_bootp_string(utsname()->domainname, ext+1, *ext, | ||
904 | __NEW_UTS_LEN); | ||
905 | break; | ||
899 | } | 906 | } |
900 | } | 907 | } |
901 | 908 | ||
@@ -932,7 +939,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str | |||
932 | goto drop; | 939 | goto drop; |
933 | 940 | ||
934 | /* Fragments are not supported */ | 941 | /* Fragments are not supported */ |
935 | if (h->frag_off & htons(IP_OFFSET | IP_MF)) { | 942 | if (ip_is_fragment(h)) { |
936 | if (net_ratelimit()) | 943 | if (net_ratelimit()) |
937 | printk(KERN_ERR "DHCP/BOOTP: Ignoring fragmented " | 944 | printk(KERN_ERR "DHCP/BOOTP: Ignoring fragmented " |
938 | "reply.\n"); | 945 | "reply.\n"); |
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 378b20b7ca6..6f06f7f39ea 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -285,6 +285,8 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net, | |||
285 | if (register_netdevice(dev) < 0) | 285 | if (register_netdevice(dev) < 0) |
286 | goto failed_free; | 286 | goto failed_free; |
287 | 287 | ||
288 | strcpy(nt->parms.name, dev->name); | ||
289 | |||
288 | dev_hold(dev); | 290 | dev_hold(dev); |
289 | ipip_tunnel_link(ipn, nt); | 291 | ipip_tunnel_link(ipn, nt); |
290 | return nt; | 292 | return nt; |
@@ -759,7 +761,6 @@ static int ipip_tunnel_init(struct net_device *dev) | |||
759 | struct ip_tunnel *tunnel = netdev_priv(dev); | 761 | struct ip_tunnel *tunnel = netdev_priv(dev); |
760 | 762 | ||
761 | tunnel->dev = dev; | 763 | tunnel->dev = dev; |
762 | strcpy(tunnel->parms.name, dev->name); | ||
763 | 764 | ||
764 | memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); | 765 | memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); |
765 | memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); | 766 | memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); |
@@ -825,6 +826,7 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) | |||
825 | static int __net_init ipip_init_net(struct net *net) | 826 | static int __net_init ipip_init_net(struct net *net) |
826 | { | 827 | { |
827 | struct ipip_net *ipn = net_generic(net, ipip_net_id); | 828 | struct ipip_net *ipn = net_generic(net, ipip_net_id); |
829 | struct ip_tunnel *t; | ||
828 | int err; | 830 | int err; |
829 | 831 | ||
830 | ipn->tunnels[0] = ipn->tunnels_wc; | 832 | ipn->tunnels[0] = ipn->tunnels_wc; |
@@ -848,6 +850,9 @@ static int __net_init ipip_init_net(struct net *net) | |||
848 | if ((err = register_netdev(ipn->fb_tunnel_dev))) | 850 | if ((err = register_netdev(ipn->fb_tunnel_dev))) |
849 | goto err_reg_dev; | 851 | goto err_reg_dev; |
850 | 852 | ||
853 | t = netdev_priv(ipn->fb_tunnel_dev); | ||
854 | |||
855 | strcpy(t->parms.name, ipn->fb_tunnel_dev->name); | ||
851 | return 0; | 856 | return 0; |
852 | 857 | ||
853 | err_reg_dev: | 858 | err_reg_dev: |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 30a7763c400..58e87915797 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -1796,7 +1796,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) | |||
1796 | struct flowi4 fl4 = { | 1796 | struct flowi4 fl4 = { |
1797 | .daddr = iph->daddr, | 1797 | .daddr = iph->daddr, |
1798 | .saddr = iph->saddr, | 1798 | .saddr = iph->saddr, |
1799 | .flowi4_tos = iph->tos, | 1799 | .flowi4_tos = RT_TOS(iph->tos), |
1800 | .flowi4_oif = rt->rt_oif, | 1800 | .flowi4_oif = rt->rt_oif, |
1801 | .flowi4_iif = rt->rt_iif, | 1801 | .flowi4_iif = rt->rt_iif, |
1802 | .flowi4_mark = rt->rt_mark, | 1802 | .flowi4_mark = rt->rt_mark, |
@@ -2544,7 +2544,8 @@ int __init ip_mr_init(void) | |||
2544 | goto add_proto_fail; | 2544 | goto add_proto_fail; |
2545 | } | 2545 | } |
2546 | #endif | 2546 | #endif |
2547 | rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute); | 2547 | rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, |
2548 | NULL, ipmr_rtm_dumproute, NULL); | ||
2548 | return 0; | 2549 | return 0; |
2549 | 2550 | ||
2550 | #ifdef CONFIG_IP_PIMSM_V2 | 2551 | #ifdef CONFIG_IP_PIMSM_V2 |
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 2e97e3ec1eb..929b27bdeb7 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c | |||
@@ -18,17 +18,15 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | |||
18 | struct rtable *rt; | 18 | struct rtable *rt; |
19 | struct flowi4 fl4 = {}; | 19 | struct flowi4 fl4 = {}; |
20 | __be32 saddr = iph->saddr; | 20 | __be32 saddr = iph->saddr; |
21 | __u8 flags = 0; | 21 | __u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; |
22 | unsigned int hh_len; | 22 | unsigned int hh_len; |
23 | 23 | ||
24 | if (!skb->sk && addr_type != RTN_LOCAL) { | 24 | if (addr_type == RTN_UNSPEC) |
25 | if (addr_type == RTN_UNSPEC) | 25 | addr_type = inet_addr_type(net, saddr); |
26 | addr_type = inet_addr_type(net, saddr); | 26 | if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) |
27 | if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) | 27 | flags |= FLOWI_FLAG_ANYSRC; |
28 | flags |= FLOWI_FLAG_ANYSRC; | 28 | else |
29 | else | 29 | saddr = 0; |
30 | saddr = 0; | ||
31 | } | ||
32 | 30 | ||
33 | /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause | 31 | /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause |
34 | * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. | 32 | * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. |
@@ -38,7 +36,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | |||
38 | fl4.flowi4_tos = RT_TOS(iph->tos); | 36 | fl4.flowi4_tos = RT_TOS(iph->tos); |
39 | fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; | 37 | fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; |
40 | fl4.flowi4_mark = skb->mark; | 38 | fl4.flowi4_mark = skb->mark; |
41 | fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : flags; | 39 | fl4.flowi4_flags = flags; |
42 | rt = ip_route_output_key(net, &fl4); | 40 | rt = ip_route_output_key(net, &fl4); |
43 | if (IS_ERR(rt)) | 41 | if (IS_ERR(rt)) |
44 | return -1; | 42 | return -1; |
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 1dfc18a03fd..73b4e91a87e 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
@@ -113,6 +113,18 @@ config IP_NF_TARGET_REJECT | |||
113 | 113 | ||
114 | To compile it as a module, choose M here. If unsure, say N. | 114 | To compile it as a module, choose M here. If unsure, say N. |
115 | 115 | ||
116 | config IP_NF_TARGET_REJECT_SKERR | ||
117 | bool "Force socket error when rejecting with icmp*" | ||
118 | depends on IP_NF_TARGET_REJECT | ||
119 | default n | ||
120 | help | ||
121 | This option enables turning a "--reject-with icmp*" into a matching | ||
122 | socket error also. | ||
123 | The REJECT target normally allows sending an ICMP message. But it | ||
124 | leaves the local socket unaware of any ingress rejects. | ||
125 | |||
126 | If unsure, say N. | ||
127 | |||
116 | config IP_NF_TARGET_LOG | 128 | config IP_NF_TARGET_LOG |
117 | tristate "LOG target support" | 129 | tristate "LOG target support" |
118 | default m if NETFILTER_ADVANCED=n | 130 | default m if NETFILTER_ADVANCED=n |
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 5c9b9d96391..e59aabd0eae 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c | |||
@@ -218,6 +218,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) | |||
218 | return skb; | 218 | return skb; |
219 | 219 | ||
220 | nlmsg_failure: | 220 | nlmsg_failure: |
221 | kfree_skb(skb); | ||
221 | *errp = -EINVAL; | 222 | *errp = -EINVAL; |
222 | printk(KERN_ERR "ip_queue: error creating packet message\n"); | 223 | printk(KERN_ERR "ip_queue: error creating packet message\n"); |
223 | return NULL; | 224 | return NULL; |
@@ -313,7 +314,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) | |||
313 | { | 314 | { |
314 | struct nf_queue_entry *entry; | 315 | struct nf_queue_entry *entry; |
315 | 316 | ||
316 | if (vmsg->value > NF_MAX_VERDICT) | 317 | if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN) |
317 | return -EINVAL; | 318 | return -EINVAL; |
318 | 319 | ||
319 | entry = ipq_find_dequeue_entry(vmsg->id); | 320 | entry = ipq_find_dequeue_entry(vmsg->id); |
@@ -358,12 +359,9 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg, | |||
358 | break; | 359 | break; |
359 | 360 | ||
360 | case IPQM_VERDICT: | 361 | case IPQM_VERDICT: |
361 | if (pmsg->msg.verdict.value > NF_MAX_VERDICT) | 362 | status = ipq_set_verdict(&pmsg->msg.verdict, |
362 | status = -EINVAL; | 363 | len - sizeof(*pmsg)); |
363 | else | 364 | break; |
364 | status = ipq_set_verdict(&pmsg->msg.verdict, | ||
365 | len - sizeof(*pmsg)); | ||
366 | break; | ||
367 | default: | 365 | default: |
368 | status = -EINVAL; | 366 | status = -EINVAL; |
369 | } | 367 | } |
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 5c9e97c7901..db8d22db425 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
@@ -317,19 +317,19 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
317 | hash = clusterip_hashfn(skb, cipinfo->config); | 317 | hash = clusterip_hashfn(skb, cipinfo->config); |
318 | 318 | ||
319 | switch (ctinfo) { | 319 | switch (ctinfo) { |
320 | case IP_CT_NEW: | 320 | case IP_CT_NEW: |
321 | ct->mark = hash; | 321 | ct->mark = hash; |
322 | break; | 322 | break; |
323 | case IP_CT_RELATED: | 323 | case IP_CT_RELATED: |
324 | case IP_CT_RELATED_REPLY: | 324 | case IP_CT_RELATED_REPLY: |
325 | /* FIXME: we don't handle expectations at the | 325 | /* FIXME: we don't handle expectations at the moment. |
326 | * moment. they can arrive on a different node than | 326 | * They can arrive on a different node than |
327 | * the master connection (e.g. FTP passive mode) */ | 327 | * the master connection (e.g. FTP passive mode) */ |
328 | case IP_CT_ESTABLISHED: | 328 | case IP_CT_ESTABLISHED: |
329 | case IP_CT_ESTABLISHED_REPLY: | 329 | case IP_CT_ESTABLISHED_REPLY: |
330 | break; | 330 | break; |
331 | default: | 331 | default: /* Prevent gcc warnings */ |
332 | break; | 332 | break; |
333 | } | 333 | } |
334 | 334 | ||
335 | #ifdef DEBUG | 335 | #ifdef DEBUG |
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 51f13f8ec72..9dd754c7f2b 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c | |||
@@ -128,6 +128,14 @@ static void send_reset(struct sk_buff *oldskb, int hook) | |||
128 | static inline void send_unreach(struct sk_buff *skb_in, int code) | 128 | static inline void send_unreach(struct sk_buff *skb_in, int code) |
129 | { | 129 | { |
130 | icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); | 130 | icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); |
131 | #ifdef CONFIG_IP_NF_TARGET_REJECT_SKERR | ||
132 | if (skb_in->sk) { | ||
133 | skb_in->sk->sk_err = icmp_err_convert[code].errno; | ||
134 | skb_in->sk->sk_error_report(skb_in->sk); | ||
135 | pr_debug("ipt_REJECT: sk_err=%d for skb=%p sk=%p\n", | ||
136 | skb_in->sk->sk_err, skb_in, skb_in->sk); | ||
137 | } | ||
138 | #endif | ||
131 | } | 139 | } |
132 | 140 | ||
133 | static unsigned int | 141 | static unsigned int |
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index f3a9b42b16c..9bb1b8a37a2 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c | |||
@@ -82,7 +82,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, | |||
82 | #endif | 82 | #endif |
83 | #endif | 83 | #endif |
84 | /* Gather fragments. */ | 84 | /* Gather fragments. */ |
85 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { | 85 | if (ip_is_fragment(ip_hdr(skb))) { |
86 | enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb); | 86 | enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb); |
87 | if (nf_ct_ipv4_gather_frags(skb, user)) | 87 | if (nf_ct_ipv4_gather_frags(skb, user)) |
88 | return NF_STOLEN; | 88 | return NF_STOLEN; |
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index 3e61faf23a9..f52d41ea069 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/ip.h> | 12 | #include <linux/ip.h> |
13 | 13 | ||
14 | #include <linux/netfilter.h> | 14 | #include <linux/netfilter.h> |
15 | #include <net/secure_seq.h> | ||
15 | #include <net/netfilter/nf_nat.h> | 16 | #include <net/netfilter/nf_nat.h> |
16 | #include <net/netfilter/nf_nat_core.h> | 17 | #include <net/netfilter/nf_nat_core.h> |
17 | #include <net/netfilter/nf_nat_rule.h> | 18 | #include <net/netfilter/nf_nat_rule.h> |
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 8812a02078a..076b7c8c4aa 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c | |||
@@ -719,117 +719,115 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, | |||
719 | 719 | ||
720 | l = 0; | 720 | l = 0; |
721 | switch (type) { | 721 | switch (type) { |
722 | case SNMP_INTEGER: | 722 | case SNMP_INTEGER: |
723 | len = sizeof(long); | 723 | len = sizeof(long); |
724 | if (!asn1_long_decode(ctx, end, &l)) { | 724 | if (!asn1_long_decode(ctx, end, &l)) { |
725 | kfree(id); | 725 | kfree(id); |
726 | return 0; | 726 | return 0; |
727 | } | 727 | } |
728 | *obj = kmalloc(sizeof(struct snmp_object) + len, | 728 | *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); |
729 | GFP_ATOMIC); | 729 | if (*obj == NULL) { |
730 | if (*obj == NULL) { | 730 | kfree(id); |
731 | kfree(id); | 731 | if (net_ratelimit()) |
732 | if (net_ratelimit()) | 732 | pr_notice("OOM in bsalg (%d)\n", __LINE__); |
733 | pr_notice("OOM in bsalg (%d)\n", __LINE__); | 733 | return 0; |
734 | return 0; | 734 | } |
735 | } | 735 | (*obj)->syntax.l[0] = l; |
736 | (*obj)->syntax.l[0] = l; | 736 | break; |
737 | break; | 737 | case SNMP_OCTETSTR: |
738 | case SNMP_OCTETSTR: | 738 | case SNMP_OPAQUE: |
739 | case SNMP_OPAQUE: | 739 | if (!asn1_octets_decode(ctx, end, &p, &len)) { |
740 | if (!asn1_octets_decode(ctx, end, &p, &len)) { | 740 | kfree(id); |
741 | kfree(id); | 741 | return 0; |
742 | return 0; | 742 | } |
743 | } | 743 | *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); |
744 | *obj = kmalloc(sizeof(struct snmp_object) + len, | 744 | if (*obj == NULL) { |
745 | GFP_ATOMIC); | ||
746 | if (*obj == NULL) { | ||
747 | kfree(p); | ||
748 | kfree(id); | ||
749 | if (net_ratelimit()) | ||
750 | pr_notice("OOM in bsalg (%d)\n", __LINE__); | ||
751 | return 0; | ||
752 | } | ||
753 | memcpy((*obj)->syntax.c, p, len); | ||
754 | kfree(p); | 745 | kfree(p); |
755 | break; | 746 | kfree(id); |
756 | case SNMP_NULL: | 747 | if (net_ratelimit()) |
757 | case SNMP_NOSUCHOBJECT: | 748 | pr_notice("OOM in bsalg (%d)\n", __LINE__); |
758 | case SNMP_NOSUCHINSTANCE: | 749 | return 0; |
759 | case SNMP_ENDOFMIBVIEW: | 750 | } |
760 | len = 0; | 751 | memcpy((*obj)->syntax.c, p, len); |
761 | *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); | 752 | kfree(p); |
762 | if (*obj == NULL) { | 753 | break; |
763 | kfree(id); | 754 | case SNMP_NULL: |
764 | if (net_ratelimit()) | 755 | case SNMP_NOSUCHOBJECT: |
765 | pr_notice("OOM in bsalg (%d)\n", __LINE__); | 756 | case SNMP_NOSUCHINSTANCE: |
766 | return 0; | 757 | case SNMP_ENDOFMIBVIEW: |
767 | } | 758 | len = 0; |
768 | if (!asn1_null_decode(ctx, end)) { | 759 | *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); |
769 | kfree(id); | 760 | if (*obj == NULL) { |
770 | kfree(*obj); | 761 | kfree(id); |
771 | *obj = NULL; | 762 | if (net_ratelimit()) |
772 | return 0; | 763 | pr_notice("OOM in bsalg (%d)\n", __LINE__); |
773 | } | 764 | return 0; |
774 | break; | 765 | } |
775 | case SNMP_OBJECTID: | 766 | if (!asn1_null_decode(ctx, end)) { |
776 | if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) { | 767 | kfree(id); |
777 | kfree(id); | 768 | kfree(*obj); |
778 | return 0; | 769 | *obj = NULL; |
779 | } | 770 | return 0; |
780 | len *= sizeof(unsigned long); | 771 | } |
781 | *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); | 772 | break; |
782 | if (*obj == NULL) { | 773 | case SNMP_OBJECTID: |
783 | kfree(lp); | 774 | if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) { |
784 | kfree(id); | 775 | kfree(id); |
785 | if (net_ratelimit()) | 776 | return 0; |
786 | pr_notice("OOM in bsalg (%d)\n", __LINE__); | 777 | } |
787 | return 0; | 778 | len *= sizeof(unsigned long); |
788 | } | 779 | *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); |
789 | memcpy((*obj)->syntax.ul, lp, len); | 780 | if (*obj == NULL) { |
790 | kfree(lp); | 781 | kfree(lp); |
791 | break; | 782 | kfree(id); |
792 | case SNMP_IPADDR: | 783 | if (net_ratelimit()) |
793 | if (!asn1_octets_decode(ctx, end, &p, &len)) { | 784 | pr_notice("OOM in bsalg (%d)\n", __LINE__); |
794 | kfree(id); | 785 | return 0; |
795 | return 0; | 786 | } |
796 | } | 787 | memcpy((*obj)->syntax.ul, lp, len); |
797 | if (len != 4) { | 788 | kfree(lp); |
798 | kfree(p); | 789 | break; |
799 | kfree(id); | 790 | case SNMP_IPADDR: |
800 | return 0; | 791 | if (!asn1_octets_decode(ctx, end, &p, &len)) { |
801 | } | 792 | kfree(id); |
802 | *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); | 793 | return 0; |
803 | if (*obj == NULL) { | 794 | } |
804 | kfree(p); | 795 | if (len != 4) { |
805 | kfree(id); | ||
806 | if (net_ratelimit()) | ||
807 | pr_notice("OOM in bsalg (%d)\n", __LINE__); | ||
808 | return 0; | ||
809 | } | ||
810 | memcpy((*obj)->syntax.uc, p, len); | ||
811 | kfree(p); | 796 | kfree(p); |
812 | break; | ||
813 | case SNMP_COUNTER: | ||
814 | case SNMP_GAUGE: | ||
815 | case SNMP_TIMETICKS: | ||
816 | len = sizeof(unsigned long); | ||
817 | if (!asn1_ulong_decode(ctx, end, &ul)) { | ||
818 | kfree(id); | ||
819 | return 0; | ||
820 | } | ||
821 | *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); | ||
822 | if (*obj == NULL) { | ||
823 | kfree(id); | ||
824 | if (net_ratelimit()) | ||
825 | pr_notice("OOM in bsalg (%d)\n", __LINE__); | ||
826 | return 0; | ||
827 | } | ||
828 | (*obj)->syntax.ul[0] = ul; | ||
829 | break; | ||
830 | default: | ||
831 | kfree(id); | 797 | kfree(id); |
832 | return 0; | 798 | return 0; |
799 | } | ||
800 | *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); | ||
801 | if (*obj == NULL) { | ||
802 | kfree(p); | ||
803 | kfree(id); | ||
804 | if (net_ratelimit()) | ||
805 | pr_notice("OOM in bsalg (%d)\n", __LINE__); | ||
806 | return 0; | ||
807 | } | ||
808 | memcpy((*obj)->syntax.uc, p, len); | ||
809 | kfree(p); | ||
810 | break; | ||
811 | case SNMP_COUNTER: | ||
812 | case SNMP_GAUGE: | ||
813 | case SNMP_TIMETICKS: | ||
814 | len = sizeof(unsigned long); | ||
815 | if (!asn1_ulong_decode(ctx, end, &ul)) { | ||
816 | kfree(id); | ||
817 | return 0; | ||
818 | } | ||
819 | *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); | ||
820 | if (*obj == NULL) { | ||
821 | kfree(id); | ||
822 | if (net_ratelimit()) | ||
823 | pr_notice("OOM in bsalg (%d)\n", __LINE__); | ||
824 | return 0; | ||
825 | } | ||
826 | (*obj)->syntax.ul[0] = ul; | ||
827 | break; | ||
828 | default: | ||
829 | kfree(id); | ||
830 | return 0; | ||
833 | } | 831 | } |
834 | 832 | ||
835 | (*obj)->syntax_len = len; | 833 | (*obj)->syntax_len = len; |
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 483b76d042d..a6e606e8482 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c | |||
@@ -88,7 +88,7 @@ nf_nat_fn(unsigned int hooknum, | |||
88 | 88 | ||
89 | /* We never see fragments: conntrack defrags on pre-routing | 89 | /* We never see fragments: conntrack defrags on pre-routing |
90 | and local-out, and nf_nat_out protects post-routing. */ | 90 | and local-out, and nf_nat_out protects post-routing. */ |
91 | NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); | 91 | NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); |
92 | 92 | ||
93 | ct = nf_ct_get(skb, &ctinfo); | 93 | ct = nf_ct_get(skb, &ctinfo); |
94 | /* Can't track? It's not due to stress, or conntrack would | 94 | /* Can't track? It's not due to stress, or conntrack would |
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index b14ec7d03b6..4bfad5da94f 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -254,6 +254,8 @@ static const struct snmp_mib snmp4_net_list[] = { | |||
254 | SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), | 254 | SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), |
255 | SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), | 255 | SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), |
256 | SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW), | 256 | SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW), |
257 | SNMP_MIB_ITEM("TCPReqQFullDoCookies", LINUX_MIB_TCPREQQFULLDOCOOKIES), | ||
258 | SNMP_MIB_ITEM("TCPReqQFullDrop", LINUX_MIB_TCPREQQFULLDROP), | ||
257 | SNMP_MIB_SENTINEL | 259 | SNMP_MIB_SENTINEL |
258 | }; | 260 | }; |
259 | 261 | ||
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index c9893d43242..61714bd5292 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -38,7 +38,7 @@ | |||
38 | */ | 38 | */ |
39 | 39 | ||
40 | #include <linux/types.h> | 40 | #include <linux/types.h> |
41 | #include <asm/atomic.h> | 41 | #include <linux/atomic.h> |
42 | #include <asm/byteorder.h> | 42 | #include <asm/byteorder.h> |
43 | #include <asm/current.h> | 43 | #include <asm/current.h> |
44 | #include <asm/uaccess.h> | 44 | #include <asm/uaccess.h> |
@@ -563,7 +563,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
563 | flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, | 563 | flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, |
564 | RT_SCOPE_UNIVERSE, | 564 | RT_SCOPE_UNIVERSE, |
565 | inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, | 565 | inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, |
566 | FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0); | 566 | inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP, |
567 | daddr, saddr, 0, 0); | ||
567 | 568 | ||
568 | if (!inet->hdrincl) { | 569 | if (!inet->hdrincl) { |
569 | err = raw_probe_proto_opt(&fl4, msg); | 570 | err = raw_probe_proto_opt(&fl4, msg); |
@@ -825,28 +826,28 @@ static int compat_raw_getsockopt(struct sock *sk, int level, int optname, | |||
825 | static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) | 826 | static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) |
826 | { | 827 | { |
827 | switch (cmd) { | 828 | switch (cmd) { |
828 | case SIOCOUTQ: { | 829 | case SIOCOUTQ: { |
829 | int amount = sk_wmem_alloc_get(sk); | 830 | int amount = sk_wmem_alloc_get(sk); |
830 | 831 | ||
831 | return put_user(amount, (int __user *)arg); | 832 | return put_user(amount, (int __user *)arg); |
832 | } | 833 | } |
833 | case SIOCINQ: { | 834 | case SIOCINQ: { |
834 | struct sk_buff *skb; | 835 | struct sk_buff *skb; |
835 | int amount = 0; | 836 | int amount = 0; |
836 | 837 | ||
837 | spin_lock_bh(&sk->sk_receive_queue.lock); | 838 | spin_lock_bh(&sk->sk_receive_queue.lock); |
838 | skb = skb_peek(&sk->sk_receive_queue); | 839 | skb = skb_peek(&sk->sk_receive_queue); |
839 | if (skb != NULL) | 840 | if (skb != NULL) |
840 | amount = skb->len; | 841 | amount = skb->len; |
841 | spin_unlock_bh(&sk->sk_receive_queue.lock); | 842 | spin_unlock_bh(&sk->sk_receive_queue.lock); |
842 | return put_user(amount, (int __user *)arg); | 843 | return put_user(amount, (int __user *)arg); |
843 | } | 844 | } |
844 | 845 | ||
845 | default: | 846 | default: |
846 | #ifdef CONFIG_IP_MROUTE | 847 | #ifdef CONFIG_IP_MROUTE |
847 | return ipmr_ioctl(sk, cmd, (void __user *)arg); | 848 | return ipmr_ioctl(sk, cmd, (void __user *)arg); |
848 | #else | 849 | #else |
849 | return -ENOIOCTLCMD; | 850 | return -ENOIOCTLCMD; |
850 | #endif | 851 | #endif |
851 | } | 852 | } |
852 | } | 853 | } |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index aa13ef10511..b5638545deb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -91,6 +91,7 @@ | |||
91 | #include <linux/rcupdate.h> | 91 | #include <linux/rcupdate.h> |
92 | #include <linux/times.h> | 92 | #include <linux/times.h> |
93 | #include <linux/slab.h> | 93 | #include <linux/slab.h> |
94 | #include <linux/prefetch.h> | ||
94 | #include <net/dst.h> | 95 | #include <net/dst.h> |
95 | #include <net/net_namespace.h> | 96 | #include <net/net_namespace.h> |
96 | #include <net/protocol.h> | 97 | #include <net/protocol.h> |
@@ -108,9 +109,11 @@ | |||
108 | #ifdef CONFIG_SYSCTL | 109 | #ifdef CONFIG_SYSCTL |
109 | #include <linux/sysctl.h> | 110 | #include <linux/sysctl.h> |
110 | #endif | 111 | #endif |
112 | #include <net/atmclip.h> | ||
113 | #include <net/secure_seq.h> | ||
111 | 114 | ||
112 | #define RT_FL_TOS(oldflp4) \ | 115 | #define RT_FL_TOS(oldflp4) \ |
113 | ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) | 116 | ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) |
114 | 117 | ||
115 | #define IP_MAX_MTU 0xFFF0 | 118 | #define IP_MAX_MTU 0xFFF0 |
116 | 119 | ||
@@ -130,6 +133,10 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; | |||
130 | static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; | 133 | static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; |
131 | static int ip_rt_min_advmss __read_mostly = 256; | 134 | static int ip_rt_min_advmss __read_mostly = 256; |
132 | static int rt_chain_length_max __read_mostly = 20; | 135 | static int rt_chain_length_max __read_mostly = 20; |
136 | static int redirect_genid; | ||
137 | |||
138 | static struct delayed_work expires_work; | ||
139 | static unsigned long expires_ljiffies; | ||
133 | 140 | ||
134 | /* | 141 | /* |
135 | * Interface to generic destination cache. | 142 | * Interface to generic destination cache. |
@@ -184,6 +191,8 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) | |||
184 | return p; | 191 | return p; |
185 | } | 192 | } |
186 | 193 | ||
194 | static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr); | ||
195 | |||
187 | static struct dst_ops ipv4_dst_ops = { | 196 | static struct dst_ops ipv4_dst_ops = { |
188 | .family = AF_INET, | 197 | .family = AF_INET, |
189 | .protocol = cpu_to_be16(ETH_P_IP), | 198 | .protocol = cpu_to_be16(ETH_P_IP), |
@@ -198,6 +207,7 @@ static struct dst_ops ipv4_dst_ops = { | |||
198 | .link_failure = ipv4_link_failure, | 207 | .link_failure = ipv4_link_failure, |
199 | .update_pmtu = ip_rt_update_pmtu, | 208 | .update_pmtu = ip_rt_update_pmtu, |
200 | .local_out = __ip_local_out, | 209 | .local_out = __ip_local_out, |
210 | .neigh_lookup = ipv4_neigh_lookup, | ||
201 | }; | 211 | }; |
202 | 212 | ||
203 | #define ECN_OR_COST(class) TC_PRIO_##class | 213 | #define ECN_OR_COST(class) TC_PRIO_##class |
@@ -411,7 +421,13 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) | |||
411 | "HHUptod\tSpecDst"); | 421 | "HHUptod\tSpecDst"); |
412 | else { | 422 | else { |
413 | struct rtable *r = v; | 423 | struct rtable *r = v; |
414 | int len; | 424 | struct neighbour *n; |
425 | int len, HHUptod; | ||
426 | |||
427 | rcu_read_lock(); | ||
428 | n = dst_get_neighbour(&r->dst); | ||
429 | HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0; | ||
430 | rcu_read_unlock(); | ||
415 | 431 | ||
416 | seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" | 432 | seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" |
417 | "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", | 433 | "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", |
@@ -425,9 +441,8 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) | |||
425 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + | 441 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + |
426 | dst_metric(&r->dst, RTAX_RTTVAR)), | 442 | dst_metric(&r->dst, RTAX_RTTVAR)), |
427 | r->rt_key_tos, | 443 | r->rt_key_tos, |
428 | r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, | 444 | -1, |
429 | r->dst.hh ? (r->dst.hh->hh_output == | 445 | HHUptod, |
430 | dev_queue_xmit) : 0, | ||
431 | r->rt_spec_dst, &len); | 446 | r->rt_spec_dst, &len); |
432 | 447 | ||
433 | seq_printf(seq, "%*s\n", 127 - len, ""); | 448 | seq_printf(seq, "%*s\n", 127 - len, ""); |
@@ -716,7 +731,7 @@ static inline bool compare_hash_inputs(const struct rtable *rt1, | |||
716 | { | 731 | { |
717 | return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | | 732 | return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | |
718 | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | | 733 | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | |
719 | (rt1->rt_iif ^ rt2->rt_iif)) == 0); | 734 | (rt1->rt_route_iif ^ rt2->rt_route_iif)) == 0); |
720 | } | 735 | } |
721 | 736 | ||
722 | static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) | 737 | static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) |
@@ -725,8 +740,8 @@ static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) | |||
725 | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | | 740 | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | |
726 | (rt1->rt_mark ^ rt2->rt_mark) | | 741 | (rt1->rt_mark ^ rt2->rt_mark) | |
727 | (rt1->rt_key_tos ^ rt2->rt_key_tos) | | 742 | (rt1->rt_key_tos ^ rt2->rt_key_tos) | |
728 | (rt1->rt_oif ^ rt2->rt_oif) | | 743 | (rt1->rt_route_iif ^ rt2->rt_route_iif) | |
729 | (rt1->rt_iif ^ rt2->rt_iif)) == 0; | 744 | (rt1->rt_oif ^ rt2->rt_oif)) == 0; |
730 | } | 745 | } |
731 | 746 | ||
732 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) | 747 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) |
@@ -820,6 +835,97 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) | |||
820 | return ONE; | 835 | return ONE; |
821 | } | 836 | } |
822 | 837 | ||
838 | static void rt_check_expire(void) | ||
839 | { | ||
840 | static unsigned int rover; | ||
841 | unsigned int i = rover, goal; | ||
842 | struct rtable *rth; | ||
843 | struct rtable __rcu **rthp; | ||
844 | unsigned long samples = 0; | ||
845 | unsigned long sum = 0, sum2 = 0; | ||
846 | unsigned long delta; | ||
847 | u64 mult; | ||
848 | |||
849 | delta = jiffies - expires_ljiffies; | ||
850 | expires_ljiffies = jiffies; | ||
851 | mult = ((u64)delta) << rt_hash_log; | ||
852 | if (ip_rt_gc_timeout > 1) | ||
853 | do_div(mult, ip_rt_gc_timeout); | ||
854 | goal = (unsigned int)mult; | ||
855 | if (goal > rt_hash_mask) | ||
856 | goal = rt_hash_mask + 1; | ||
857 | for (; goal > 0; goal--) { | ||
858 | unsigned long tmo = ip_rt_gc_timeout; | ||
859 | unsigned long length; | ||
860 | |||
861 | i = (i + 1) & rt_hash_mask; | ||
862 | rthp = &rt_hash_table[i].chain; | ||
863 | |||
864 | if (need_resched()) | ||
865 | cond_resched(); | ||
866 | |||
867 | samples++; | ||
868 | |||
869 | if (rcu_dereference_raw(*rthp) == NULL) | ||
870 | continue; | ||
871 | length = 0; | ||
872 | spin_lock_bh(rt_hash_lock_addr(i)); | ||
873 | while ((rth = rcu_dereference_protected(*rthp, | ||
874 | lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { | ||
875 | prefetch(rth->dst.rt_next); | ||
876 | if (rt_is_expired(rth)) { | ||
877 | *rthp = rth->dst.rt_next; | ||
878 | rt_free(rth); | ||
879 | continue; | ||
880 | } | ||
881 | if (rth->dst.expires) { | ||
882 | /* Entry is expired even if it is in use */ | ||
883 | if (time_before_eq(jiffies, rth->dst.expires)) { | ||
884 | nofree: | ||
885 | tmo >>= 1; | ||
886 | rthp = &rth->dst.rt_next; | ||
887 | /* | ||
888 | * We only count entries on | ||
889 | * a chain with equal hash inputs once | ||
890 | * so that entries for different QOS | ||
891 | * levels, and other non-hash input | ||
892 | * attributes don't unfairly skew | ||
893 | * the length computation | ||
894 | */ | ||
895 | length += has_noalias(rt_hash_table[i].chain, rth); | ||
896 | continue; | ||
897 | } | ||
898 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) | ||
899 | goto nofree; | ||
900 | |||
901 | /* Cleanup aged off entries. */ | ||
902 | *rthp = rth->dst.rt_next; | ||
903 | rt_free(rth); | ||
904 | } | ||
905 | spin_unlock_bh(rt_hash_lock_addr(i)); | ||
906 | sum += length; | ||
907 | sum2 += length*length; | ||
908 | } | ||
909 | if (samples) { | ||
910 | unsigned long avg = sum / samples; | ||
911 | unsigned long sd = int_sqrt(sum2 / samples - avg*avg); | ||
912 | rt_chain_length_max = max_t(unsigned long, | ||
913 | ip_rt_gc_elasticity, | ||
914 | (avg + 4*sd) >> FRACT_BITS); | ||
915 | } | ||
916 | rover = i; | ||
917 | } | ||
918 | |||
919 | /* | ||
920 | * rt_worker_func() is run in process context. | ||
921 | * we call rt_check_expire() to scan part of the hash table | ||
922 | */ | ||
923 | static void rt_worker_func(struct work_struct *work) | ||
924 | { | ||
925 | rt_check_expire(); | ||
926 | schedule_delayed_work(&expires_work, ip_rt_gc_interval); | ||
927 | } | ||
928 | |||
823 | /* | 929 | /* |
824 | * Perturbation of rt_genid by a small quantity [1..256] | 930 | * Perturbation of rt_genid by a small quantity [1..256] |
825 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() | 931 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() |
@@ -832,6 +938,7 @@ static void rt_cache_invalidate(struct net *net) | |||
832 | 938 | ||
833 | get_random_bytes(&shuffle, sizeof(shuffle)); | 939 | get_random_bytes(&shuffle, sizeof(shuffle)); |
834 | atomic_add(shuffle + 1U, &net->ipv4.rt_genid); | 940 | atomic_add(shuffle + 1U, &net->ipv4.rt_genid); |
941 | redirect_genid++; | ||
835 | } | 942 | } |
836 | 943 | ||
837 | /* | 944 | /* |
@@ -1006,6 +1113,37 @@ static int slow_chain_length(const struct rtable *head) | |||
1006 | return length >> FRACT_BITS; | 1113 | return length >> FRACT_BITS; |
1007 | } | 1114 | } |
1008 | 1115 | ||
1116 | static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) | ||
1117 | { | ||
1118 | struct neigh_table *tbl = &arp_tbl; | ||
1119 | static const __be32 inaddr_any = 0; | ||
1120 | struct net_device *dev = dst->dev; | ||
1121 | const __be32 *pkey = daddr; | ||
1122 | struct neighbour *n; | ||
1123 | |||
1124 | #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) | ||
1125 | if (dev->type == ARPHRD_ATM) | ||
1126 | tbl = clip_tbl_hook; | ||
1127 | #endif | ||
1128 | if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) | ||
1129 | pkey = &inaddr_any; | ||
1130 | |||
1131 | n = __ipv4_neigh_lookup(tbl, dev, *(__force u32 *)pkey); | ||
1132 | if (n) | ||
1133 | return n; | ||
1134 | return neigh_create(tbl, pkey, dev); | ||
1135 | } | ||
1136 | |||
1137 | static int rt_bind_neighbour(struct rtable *rt) | ||
1138 | { | ||
1139 | struct neighbour *n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); | ||
1140 | if (IS_ERR(n)) | ||
1141 | return PTR_ERR(n); | ||
1142 | dst_set_neighbour(&rt->dst, n); | ||
1143 | |||
1144 | return 0; | ||
1145 | } | ||
1146 | |||
1009 | static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt, | 1147 | static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt, |
1010 | struct sk_buff *skb, int ifindex) | 1148 | struct sk_buff *skb, int ifindex) |
1011 | { | 1149 | { |
@@ -1042,7 +1180,7 @@ restart: | |||
1042 | 1180 | ||
1043 | rt->dst.flags |= DST_NOCACHE; | 1181 | rt->dst.flags |= DST_NOCACHE; |
1044 | if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { | 1182 | if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { |
1045 | int err = arp_bind_neighbour(&rt->dst); | 1183 | int err = rt_bind_neighbour(rt); |
1046 | if (err) { | 1184 | if (err) { |
1047 | if (net_ratelimit()) | 1185 | if (net_ratelimit()) |
1048 | printk(KERN_WARNING | 1186 | printk(KERN_WARNING |
@@ -1138,7 +1276,7 @@ restart: | |||
1138 | route or unicast forwarding path. | 1276 | route or unicast forwarding path. |
1139 | */ | 1277 | */ |
1140 | if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { | 1278 | if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { |
1141 | int err = arp_bind_neighbour(&rt->dst); | 1279 | int err = rt_bind_neighbour(rt); |
1142 | if (err) { | 1280 | if (err) { |
1143 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1281 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1144 | 1282 | ||
@@ -1229,7 +1367,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | |||
1229 | { | 1367 | { |
1230 | struct rtable *rt = (struct rtable *) dst; | 1368 | struct rtable *rt = (struct rtable *) dst; |
1231 | 1369 | ||
1232 | if (rt) { | 1370 | if (rt && !(rt->dst.flags & DST_NOPEER)) { |
1233 | if (rt->peer == NULL) | 1371 | if (rt->peer == NULL) |
1234 | rt_bind_peer(rt, rt->rt_dst, 1); | 1372 | rt_bind_peer(rt, rt->rt_dst, 1); |
1235 | 1373 | ||
@@ -1240,7 +1378,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | |||
1240 | iph->id = htons(inet_getid(rt->peer, more)); | 1378 | iph->id = htons(inet_getid(rt->peer, more)); |
1241 | return; | 1379 | return; |
1242 | } | 1380 | } |
1243 | } else | 1381 | } else if (!rt) |
1244 | printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", | 1382 | printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", |
1245 | __builtin_return_address(0)); | 1383 | __builtin_return_address(0)); |
1246 | 1384 | ||
@@ -1268,11 +1406,40 @@ static void rt_del(unsigned hash, struct rtable *rt) | |||
1268 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1406 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1269 | } | 1407 | } |
1270 | 1408 | ||
1409 | static void check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) | ||
1410 | { | ||
1411 | struct rtable *rt = (struct rtable *) dst; | ||
1412 | __be32 orig_gw = rt->rt_gateway; | ||
1413 | struct neighbour *n, *old_n; | ||
1414 | |||
1415 | dst_confirm(&rt->dst); | ||
1416 | |||
1417 | rt->rt_gateway = peer->redirect_learned.a4; | ||
1418 | |||
1419 | n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); | ||
1420 | if (IS_ERR(n)) { | ||
1421 | rt->rt_gateway = orig_gw; | ||
1422 | return; | ||
1423 | } | ||
1424 | old_n = xchg(&rt->dst._neighbour, n); | ||
1425 | if (old_n) | ||
1426 | neigh_release(old_n); | ||
1427 | if (!(n->nud_state & NUD_VALID)) { | ||
1428 | neigh_event_send(n, NULL); | ||
1429 | } else { | ||
1430 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1431 | call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); | ||
1432 | } | ||
1433 | } | ||
1434 | |||
1271 | /* called in rcu_read_lock() section */ | 1435 | /* called in rcu_read_lock() section */ |
1272 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | 1436 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, |
1273 | __be32 saddr, struct net_device *dev) | 1437 | __be32 saddr, struct net_device *dev) |
1274 | { | 1438 | { |
1439 | int s, i; | ||
1275 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 1440 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
1441 | __be32 skeys[2] = { saddr, 0 }; | ||
1442 | int ikeys[2] = { dev->ifindex, 0 }; | ||
1276 | struct inet_peer *peer; | 1443 | struct inet_peer *peer; |
1277 | struct net *net; | 1444 | struct net *net; |
1278 | 1445 | ||
@@ -1295,13 +1462,45 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1295 | goto reject_redirect; | 1462 | goto reject_redirect; |
1296 | } | 1463 | } |
1297 | 1464 | ||
1298 | peer = inet_getpeer_v4(daddr, 1); | 1465 | for (s = 0; s < 2; s++) { |
1299 | if (peer) { | 1466 | for (i = 0; i < 2; i++) { |
1300 | peer->redirect_learned.a4 = new_gw; | 1467 | unsigned int hash; |
1468 | struct rtable __rcu **rthp; | ||
1469 | struct rtable *rt; | ||
1301 | 1470 | ||
1302 | inet_putpeer(peer); | 1471 | hash = rt_hash(daddr, skeys[s], ikeys[i], rt_genid(net)); |
1472 | |||
1473 | rthp = &rt_hash_table[hash].chain; | ||
1303 | 1474 | ||
1304 | atomic_inc(&__rt_peer_genid); | 1475 | while ((rt = rcu_dereference(*rthp)) != NULL) { |
1476 | rthp = &rt->dst.rt_next; | ||
1477 | |||
1478 | if (rt->rt_key_dst != daddr || | ||
1479 | rt->rt_key_src != skeys[s] || | ||
1480 | rt->rt_oif != ikeys[i] || | ||
1481 | rt_is_input_route(rt) || | ||
1482 | rt_is_expired(rt) || | ||
1483 | !net_eq(dev_net(rt->dst.dev), net) || | ||
1484 | rt->dst.error || | ||
1485 | rt->dst.dev != dev || | ||
1486 | rt->rt_gateway != old_gw) | ||
1487 | continue; | ||
1488 | |||
1489 | if (!rt->peer) | ||
1490 | rt_bind_peer(rt, rt->rt_dst, 1); | ||
1491 | |||
1492 | peer = rt->peer; | ||
1493 | if (peer) { | ||
1494 | if (peer->redirect_learned.a4 != new_gw || | ||
1495 | peer->redirect_genid != redirect_genid) { | ||
1496 | peer->redirect_learned.a4 = new_gw; | ||
1497 | peer->redirect_genid = redirect_genid; | ||
1498 | atomic_inc(&__rt_peer_genid); | ||
1499 | } | ||
1500 | check_peer_redir(&rt->dst, peer); | ||
1501 | } | ||
1502 | } | ||
1503 | } | ||
1305 | } | 1504 | } |
1306 | return; | 1505 | return; |
1307 | 1506 | ||
@@ -1439,20 +1638,20 @@ static int ip_error(struct sk_buff *skb) | |||
1439 | int code; | 1638 | int code; |
1440 | 1639 | ||
1441 | switch (rt->dst.error) { | 1640 | switch (rt->dst.error) { |
1442 | case EINVAL: | 1641 | case EINVAL: |
1443 | default: | 1642 | default: |
1444 | goto out; | 1643 | goto out; |
1445 | case EHOSTUNREACH: | 1644 | case EHOSTUNREACH: |
1446 | code = ICMP_HOST_UNREACH; | 1645 | code = ICMP_HOST_UNREACH; |
1447 | break; | 1646 | break; |
1448 | case ENETUNREACH: | 1647 | case ENETUNREACH: |
1449 | code = ICMP_NET_UNREACH; | 1648 | code = ICMP_NET_UNREACH; |
1450 | IP_INC_STATS_BH(dev_net(rt->dst.dev), | 1649 | IP_INC_STATS_BH(dev_net(rt->dst.dev), |
1451 | IPSTATS_MIB_INNOROUTES); | 1650 | IPSTATS_MIB_INNOROUTES); |
1452 | break; | 1651 | break; |
1453 | case EACCES: | 1652 | case EACCES: |
1454 | code = ICMP_PKT_FILTERED; | 1653 | code = ICMP_PKT_FILTERED; |
1455 | break; | 1654 | break; |
1456 | } | 1655 | } |
1457 | 1656 | ||
1458 | if (!rt->peer) | 1657 | if (!rt->peer) |
@@ -1531,11 +1730,10 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, | |||
1531 | est_mtu = mtu; | 1730 | est_mtu = mtu; |
1532 | peer->pmtu_learned = mtu; | 1731 | peer->pmtu_learned = mtu; |
1533 | peer->pmtu_expires = pmtu_expires; | 1732 | peer->pmtu_expires = pmtu_expires; |
1733 | atomic_inc(&__rt_peer_genid); | ||
1534 | } | 1734 | } |
1535 | 1735 | ||
1536 | inet_putpeer(peer); | 1736 | inet_putpeer(peer); |
1537 | |||
1538 | atomic_inc(&__rt_peer_genid); | ||
1539 | } | 1737 | } |
1540 | return est_mtu ? : new_mtu; | 1738 | return est_mtu ? : new_mtu; |
1541 | } | 1739 | } |
@@ -1588,37 +1786,9 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) | |||
1588 | } | 1786 | } |
1589 | } | 1787 | } |
1590 | 1788 | ||
1591 | static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) | ||
1592 | { | ||
1593 | struct rtable *rt = (struct rtable *) dst; | ||
1594 | __be32 orig_gw = rt->rt_gateway; | ||
1595 | |||
1596 | dst_confirm(&rt->dst); | ||
1597 | |||
1598 | neigh_release(rt->dst.neighbour); | ||
1599 | rt->dst.neighbour = NULL; | ||
1600 | |||
1601 | rt->rt_gateway = peer->redirect_learned.a4; | ||
1602 | if (arp_bind_neighbour(&rt->dst) || | ||
1603 | !(rt->dst.neighbour->nud_state & NUD_VALID)) { | ||
1604 | if (rt->dst.neighbour) | ||
1605 | neigh_event_send(rt->dst.neighbour, NULL); | ||
1606 | rt->rt_gateway = orig_gw; | ||
1607 | return -EAGAIN; | ||
1608 | } else { | ||
1609 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1610 | call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, | ||
1611 | rt->dst.neighbour); | ||
1612 | } | ||
1613 | return 0; | ||
1614 | } | ||
1615 | 1789 | ||
1616 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) | 1790 | static void ipv4_validate_peer(struct rtable *rt) |
1617 | { | 1791 | { |
1618 | struct rtable *rt = (struct rtable *) dst; | ||
1619 | |||
1620 | if (rt_is_expired(rt)) | ||
1621 | return NULL; | ||
1622 | if (rt->rt_peer_genid != rt_peer_genid()) { | 1792 | if (rt->rt_peer_genid != rt_peer_genid()) { |
1623 | struct inet_peer *peer; | 1793 | struct inet_peer *peer; |
1624 | 1794 | ||
@@ -1627,17 +1797,26 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) | |||
1627 | 1797 | ||
1628 | peer = rt->peer; | 1798 | peer = rt->peer; |
1629 | if (peer) { | 1799 | if (peer) { |
1630 | check_peer_pmtu(dst, peer); | 1800 | check_peer_pmtu(&rt->dst, peer); |
1631 | 1801 | ||
1802 | if (peer->redirect_genid != redirect_genid) | ||
1803 | peer->redirect_learned.a4 = 0; | ||
1632 | if (peer->redirect_learned.a4 && | 1804 | if (peer->redirect_learned.a4 && |
1633 | peer->redirect_learned.a4 != rt->rt_gateway) { | 1805 | peer->redirect_learned.a4 != rt->rt_gateway) |
1634 | if (check_peer_redir(dst, peer)) | 1806 | check_peer_redir(&rt->dst, peer); |
1635 | return NULL; | ||
1636 | } | ||
1637 | } | 1807 | } |
1638 | 1808 | ||
1639 | rt->rt_peer_genid = rt_peer_genid(); | 1809 | rt->rt_peer_genid = rt_peer_genid(); |
1640 | } | 1810 | } |
1811 | } | ||
1812 | |||
1813 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) | ||
1814 | { | ||
1815 | struct rtable *rt = (struct rtable *) dst; | ||
1816 | |||
1817 | if (rt_is_expired(rt)) | ||
1818 | return NULL; | ||
1819 | ipv4_validate_peer(rt); | ||
1641 | return dst; | 1820 | return dst; |
1642 | } | 1821 | } |
1643 | 1822 | ||
@@ -1703,7 +1882,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) | |||
1703 | memset(&fl4, 0, sizeof(fl4)); | 1882 | memset(&fl4, 0, sizeof(fl4)); |
1704 | fl4.daddr = iph->daddr; | 1883 | fl4.daddr = iph->daddr; |
1705 | fl4.saddr = iph->saddr; | 1884 | fl4.saddr = iph->saddr; |
1706 | fl4.flowi4_tos = iph->tos; | 1885 | fl4.flowi4_tos = RT_TOS(iph->tos); |
1707 | fl4.flowi4_oif = rt->dst.dev->ifindex; | 1886 | fl4.flowi4_oif = rt->dst.dev->ifindex; |
1708 | fl4.flowi4_iif = skb->dev->ifindex; | 1887 | fl4.flowi4_iif = skb->dev->ifindex; |
1709 | fl4.flowi4_mark = skb->mark; | 1888 | fl4.flowi4_mark = skb->mark; |
@@ -1780,6 +1959,8 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, | |||
1780 | dst_init_metrics(&rt->dst, peer->metrics, false); | 1959 | dst_init_metrics(&rt->dst, peer->metrics, false); |
1781 | 1960 | ||
1782 | check_peer_pmtu(&rt->dst, peer); | 1961 | check_peer_pmtu(&rt->dst, peer); |
1962 | if (peer->redirect_genid != redirect_genid) | ||
1963 | peer->redirect_learned.a4 = 0; | ||
1783 | if (peer->redirect_learned.a4 && | 1964 | if (peer->redirect_learned.a4 && |
1784 | peer->redirect_learned.a4 != rt->rt_gateway) { | 1965 | peer->redirect_learned.a4 != rt->rt_gateway) { |
1785 | rt->rt_gateway = peer->redirect_learned.a4; | 1966 | rt->rt_gateway = peer->redirect_learned.a4; |
@@ -2280,12 +2461,12 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2280 | rth = rcu_dereference(rth->dst.rt_next)) { | 2461 | rth = rcu_dereference(rth->dst.rt_next)) { |
2281 | if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) | | 2462 | if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) | |
2282 | ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | | 2463 | ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | |
2283 | (rth->rt_iif ^ iif) | | 2464 | (rth->rt_route_iif ^ iif) | |
2284 | rth->rt_oif | | ||
2285 | (rth->rt_key_tos ^ tos)) == 0 && | 2465 | (rth->rt_key_tos ^ tos)) == 0 && |
2286 | rth->rt_mark == skb->mark && | 2466 | rth->rt_mark == skb->mark && |
2287 | net_eq(dev_net(rth->dst.dev), net) && | 2467 | net_eq(dev_net(rth->dst.dev), net) && |
2288 | !rt_is_expired(rth)) { | 2468 | !rt_is_expired(rth)) { |
2469 | ipv4_validate_peer(rth); | ||
2289 | if (noref) { | 2470 | if (noref) { |
2290 | dst_use_noref(&rth->dst, jiffies); | 2471 | dst_use_noref(&rth->dst, jiffies); |
2291 | skb_dst_set_noref(skb, &rth->dst); | 2472 | skb_dst_set_noref(skb, &rth->dst); |
@@ -2344,11 +2525,11 @@ EXPORT_SYMBOL(ip_route_input_common); | |||
2344 | static struct rtable *__mkroute_output(const struct fib_result *res, | 2525 | static struct rtable *__mkroute_output(const struct fib_result *res, |
2345 | const struct flowi4 *fl4, | 2526 | const struct flowi4 *fl4, |
2346 | __be32 orig_daddr, __be32 orig_saddr, | 2527 | __be32 orig_daddr, __be32 orig_saddr, |
2347 | int orig_oif, struct net_device *dev_out, | 2528 | int orig_oif, __u8 orig_rtos, |
2529 | struct net_device *dev_out, | ||
2348 | unsigned int flags) | 2530 | unsigned int flags) |
2349 | { | 2531 | { |
2350 | struct fib_info *fi = res->fi; | 2532 | struct fib_info *fi = res->fi; |
2351 | u32 tos = RT_FL_TOS(fl4); | ||
2352 | struct in_device *in_dev; | 2533 | struct in_device *in_dev; |
2353 | u16 type = res->type; | 2534 | u16 type = res->type; |
2354 | struct rtable *rth; | 2535 | struct rtable *rth; |
@@ -2399,7 +2580,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
2399 | rth->rt_genid = rt_genid(dev_net(dev_out)); | 2580 | rth->rt_genid = rt_genid(dev_net(dev_out)); |
2400 | rth->rt_flags = flags; | 2581 | rth->rt_flags = flags; |
2401 | rth->rt_type = type; | 2582 | rth->rt_type = type; |
2402 | rth->rt_key_tos = tos; | 2583 | rth->rt_key_tos = orig_rtos; |
2403 | rth->rt_dst = fl4->daddr; | 2584 | rth->rt_dst = fl4->daddr; |
2404 | rth->rt_src = fl4->saddr; | 2585 | rth->rt_src = fl4->saddr; |
2405 | rth->rt_route_iif = 0; | 2586 | rth->rt_route_iif = 0; |
@@ -2449,7 +2630,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
2449 | static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) | 2630 | static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) |
2450 | { | 2631 | { |
2451 | struct net_device *dev_out = NULL; | 2632 | struct net_device *dev_out = NULL; |
2452 | u32 tos = RT_FL_TOS(fl4); | 2633 | __u8 tos = RT_FL_TOS(fl4); |
2453 | unsigned int flags = 0; | 2634 | unsigned int flags = 0; |
2454 | struct fib_result res; | 2635 | struct fib_result res; |
2455 | struct rtable *rth; | 2636 | struct rtable *rth; |
@@ -2625,7 +2806,7 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) | |||
2625 | 2806 | ||
2626 | make_route: | 2807 | make_route: |
2627 | rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif, | 2808 | rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif, |
2628 | dev_out, flags); | 2809 | tos, dev_out, flags); |
2629 | if (!IS_ERR(rth)) { | 2810 | if (!IS_ERR(rth)) { |
2630 | unsigned int hash; | 2811 | unsigned int hash; |
2631 | 2812 | ||
@@ -2661,6 +2842,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) | |||
2661 | (IPTOS_RT_MASK | RTO_ONLINK)) && | 2842 | (IPTOS_RT_MASK | RTO_ONLINK)) && |
2662 | net_eq(dev_net(rth->dst.dev), net) && | 2843 | net_eq(dev_net(rth->dst.dev), net) && |
2663 | !rt_is_expired(rth)) { | 2844 | !rt_is_expired(rth)) { |
2845 | ipv4_validate_peer(rth); | ||
2664 | dst_use(&rth->dst, jiffies); | 2846 | dst_use(&rth->dst, jiffies); |
2665 | RT_CACHE_STAT_INC(out_hit); | 2847 | RT_CACHE_STAT_INC(out_hit); |
2666 | rcu_read_unlock_bh(); | 2848 | rcu_read_unlock_bh(); |
@@ -2708,6 +2890,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { | |||
2708 | .default_advmss = ipv4_default_advmss, | 2890 | .default_advmss = ipv4_default_advmss, |
2709 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, | 2891 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, |
2710 | .cow_metrics = ipv4_rt_blackhole_cow_metrics, | 2892 | .cow_metrics = ipv4_rt_blackhole_cow_metrics, |
2893 | .neigh_lookup = ipv4_neigh_lookup, | ||
2711 | }; | 2894 | }; |
2712 | 2895 | ||
2713 | struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) | 2896 | struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) |
@@ -3088,6 +3271,13 @@ static ctl_table ipv4_route_table[] = { | |||
3088 | .proc_handler = proc_dointvec_jiffies, | 3271 | .proc_handler = proc_dointvec_jiffies, |
3089 | }, | 3272 | }, |
3090 | { | 3273 | { |
3274 | .procname = "gc_interval", | ||
3275 | .data = &ip_rt_gc_interval, | ||
3276 | .maxlen = sizeof(int), | ||
3277 | .mode = 0644, | ||
3278 | .proc_handler = proc_dointvec_jiffies, | ||
3279 | }, | ||
3280 | { | ||
3091 | .procname = "redirect_load", | 3281 | .procname = "redirect_load", |
3092 | .data = &ip_rt_redirect_load, | 3282 | .data = &ip_rt_redirect_load, |
3093 | .maxlen = sizeof(int), | 3283 | .maxlen = sizeof(int), |
@@ -3297,13 +3487,18 @@ int __init ip_rt_init(void) | |||
3297 | devinet_init(); | 3487 | devinet_init(); |
3298 | ip_fib_init(); | 3488 | ip_fib_init(); |
3299 | 3489 | ||
3490 | INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); | ||
3491 | expires_ljiffies = jiffies; | ||
3492 | schedule_delayed_work(&expires_work, | ||
3493 | net_random() % ip_rt_gc_interval + ip_rt_gc_interval); | ||
3494 | |||
3300 | if (ip_rt_proc_init()) | 3495 | if (ip_rt_proc_init()) |
3301 | printk(KERN_ERR "Unable to create route proc files\n"); | 3496 | printk(KERN_ERR "Unable to create route proc files\n"); |
3302 | #ifdef CONFIG_XFRM | 3497 | #ifdef CONFIG_XFRM |
3303 | xfrm_init(); | 3498 | xfrm_init(); |
3304 | xfrm4_init(ip_rt_max_size); | 3499 | xfrm4_init(ip_rt_max_size); |
3305 | #endif | 3500 | #endif |
3306 | rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); | 3501 | rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL); |
3307 | 3502 | ||
3308 | #ifdef CONFIG_SYSCTL | 3503 | #ifdef CONFIG_SYSCTL |
3309 | register_pernet_subsys(&sysctl_route_ops); | 3504 | register_pernet_subsys(&sysctl_route_ops); |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 26461492a84..3bc5c8f7c71 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
@@ -276,7 +276,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
276 | int mss; | 276 | int mss; |
277 | struct rtable *rt; | 277 | struct rtable *rt; |
278 | __u8 rcv_wscale; | 278 | __u8 rcv_wscale; |
279 | bool ecn_ok; | 279 | bool ecn_ok = false; |
280 | 280 | ||
281 | if (!sysctl_tcp_syncookies || !th->ack || th->rst) | 281 | if (!sysctl_tcp_syncookies || !th->ack || th->rst) |
282 | goto out; | 282 | goto out; |
@@ -316,6 +316,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
316 | ireq->wscale_ok = tcp_opt.wscale_ok; | 316 | ireq->wscale_ok = tcp_opt.wscale_ok; |
317 | ireq->tstamp_ok = tcp_opt.saw_tstamp; | 317 | ireq->tstamp_ok = tcp_opt.saw_tstamp; |
318 | req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; | 318 | req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; |
319 | treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; | ||
319 | 320 | ||
320 | /* We throwed the options of the initial SYN away, so we hope | 321 | /* We throwed the options of the initial SYN away, so we hope |
321 | * the ACK carries the same options again (see RFC1122 4.2.3.8) | 322 | * the ACK carries the same options again (see RFC1122 4.2.3.8) |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 57d0752e239..69fd7201129 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -398,20 +398,6 @@ static struct ctl_table ipv4_table[] = { | |||
398 | .proc_handler = proc_dointvec_jiffies, | 398 | .proc_handler = proc_dointvec_jiffies, |
399 | }, | 399 | }, |
400 | { | 400 | { |
401 | .procname = "inet_peer_gc_mintime", | ||
402 | .data = &inet_peer_gc_mintime, | ||
403 | .maxlen = sizeof(int), | ||
404 | .mode = 0644, | ||
405 | .proc_handler = proc_dointvec_jiffies, | ||
406 | }, | ||
407 | { | ||
408 | .procname = "inet_peer_gc_maxtime", | ||
409 | .data = &inet_peer_gc_maxtime, | ||
410 | .maxlen = sizeof(int), | ||
411 | .mode = 0644, | ||
412 | .proc_handler = proc_dointvec_jiffies, | ||
413 | }, | ||
414 | { | ||
415 | .procname = "tcp_orphan_retries", | 401 | .procname = "tcp_orphan_retries", |
416 | .data = &sysctl_tcp_orphan_retries, | 402 | .data = &sysctl_tcp_orphan_retries, |
417 | .maxlen = sizeof(int), | 403 | .maxlen = sizeof(int), |
diff --git a/net/ipv4/sysfs_net_ipv4.c b/net/ipv4/sysfs_net_ipv4.c new file mode 100644 index 00000000000..0cbbf10026a --- /dev/null +++ b/net/ipv4/sysfs_net_ipv4.c | |||
@@ -0,0 +1,88 @@ | |||
1 | /* | ||
2 | * net/ipv4/sysfs_net_ipv4.c | ||
3 | * | ||
4 | * sysfs-based networking knobs (so we can, unlike with sysctl, control perms) | ||
5 | * | ||
6 | * Copyright (C) 2008 Google, Inc. | ||
7 | * | ||
8 | * Robert Love <rlove@google.com> | ||
9 | * | ||
10 | * This software is licensed under the terms of the GNU General Public | ||
11 | * License version 2, as published by the Free Software Foundation, and | ||
12 | * may be copied, distributed, and modified under those terms. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | */ | ||
19 | |||
20 | #include <linux/kobject.h> | ||
21 | #include <linux/string.h> | ||
22 | #include <linux/sysfs.h> | ||
23 | #include <linux/init.h> | ||
24 | #include <net/tcp.h> | ||
25 | |||
26 | #define CREATE_IPV4_FILE(_name, _var) \ | ||
27 | static ssize_t _name##_show(struct kobject *kobj, \ | ||
28 | struct kobj_attribute *attr, char *buf) \ | ||
29 | { \ | ||
30 | return sprintf(buf, "%d\n", _var); \ | ||
31 | } \ | ||
32 | static ssize_t _name##_store(struct kobject *kobj, \ | ||
33 | struct kobj_attribute *attr, \ | ||
34 | const char *buf, size_t count) \ | ||
35 | { \ | ||
36 | int val, ret; \ | ||
37 | ret = sscanf(buf, "%d", &val); \ | ||
38 | if (ret != 1) \ | ||
39 | return -EINVAL; \ | ||
40 | if (val < 0) \ | ||
41 | return -EINVAL; \ | ||
42 | _var = val; \ | ||
43 | return count; \ | ||
44 | } \ | ||
45 | static struct kobj_attribute _name##_attr = \ | ||
46 | __ATTR(_name, 0644, _name##_show, _name##_store) | ||
47 | |||
48 | CREATE_IPV4_FILE(tcp_wmem_min, sysctl_tcp_wmem[0]); | ||
49 | CREATE_IPV4_FILE(tcp_wmem_def, sysctl_tcp_wmem[1]); | ||
50 | CREATE_IPV4_FILE(tcp_wmem_max, sysctl_tcp_wmem[2]); | ||
51 | |||
52 | CREATE_IPV4_FILE(tcp_rmem_min, sysctl_tcp_rmem[0]); | ||
53 | CREATE_IPV4_FILE(tcp_rmem_def, sysctl_tcp_rmem[1]); | ||
54 | CREATE_IPV4_FILE(tcp_rmem_max, sysctl_tcp_rmem[2]); | ||
55 | |||
56 | static struct attribute *ipv4_attrs[] = { | ||
57 | &tcp_wmem_min_attr.attr, | ||
58 | &tcp_wmem_def_attr.attr, | ||
59 | &tcp_wmem_max_attr.attr, | ||
60 | &tcp_rmem_min_attr.attr, | ||
61 | &tcp_rmem_def_attr.attr, | ||
62 | &tcp_rmem_max_attr.attr, | ||
63 | NULL | ||
64 | }; | ||
65 | |||
66 | static struct attribute_group ipv4_attr_group = { | ||
67 | .attrs = ipv4_attrs, | ||
68 | }; | ||
69 | |||
70 | static __init int sysfs_ipv4_init(void) | ||
71 | { | ||
72 | struct kobject *ipv4_kobject; | ||
73 | int ret; | ||
74 | |||
75 | ipv4_kobject = kobject_create_and_add("ipv4", kernel_kobj); | ||
76 | if (!ipv4_kobject) | ||
77 | return -ENOMEM; | ||
78 | |||
79 | ret = sysfs_create_group(ipv4_kobject, &ipv4_attr_group); | ||
80 | if (ret) { | ||
81 | kobject_put(ipv4_kobject); | ||
82 | return ret; | ||
83 | } | ||
84 | |||
85 | return 0; | ||
86 | } | ||
87 | |||
88 | subsys_initcall(sysfs_ipv4_init); | ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 46febcacb72..09ced58e6a5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -266,11 +266,15 @@ | |||
266 | #include <linux/crypto.h> | 266 | #include <linux/crypto.h> |
267 | #include <linux/time.h> | 267 | #include <linux/time.h> |
268 | #include <linux/slab.h> | 268 | #include <linux/slab.h> |
269 | #include <linux/uid_stat.h> | ||
269 | 270 | ||
270 | #include <net/icmp.h> | 271 | #include <net/icmp.h> |
271 | #include <net/tcp.h> | 272 | #include <net/tcp.h> |
272 | #include <net/xfrm.h> | 273 | #include <net/xfrm.h> |
273 | #include <net/ip.h> | 274 | #include <net/ip.h> |
275 | #include <net/ip6_route.h> | ||
276 | #include <net/ipv6.h> | ||
277 | #include <net/transp_v6.h> | ||
274 | #include <net/netdma.h> | 278 | #include <net/netdma.h> |
275 | #include <net/sock.h> | 279 | #include <net/sock.h> |
276 | 280 | ||
@@ -1112,6 +1116,9 @@ out: | |||
1112 | if (copied) | 1116 | if (copied) |
1113 | tcp_push(sk, flags, mss_now, tp->nonagle); | 1117 | tcp_push(sk, flags, mss_now, tp->nonagle); |
1114 | release_sock(sk); | 1118 | release_sock(sk); |
1119 | |||
1120 | if (copied > 0) | ||
1121 | uid_stat_tcp_snd(current_uid(), copied); | ||
1115 | return copied; | 1122 | return copied; |
1116 | 1123 | ||
1117 | do_fault: | 1124 | do_fault: |
@@ -1388,8 +1395,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |||
1388 | tcp_rcv_space_adjust(sk); | 1395 | tcp_rcv_space_adjust(sk); |
1389 | 1396 | ||
1390 | /* Clean up data we have read: This will do ACK frames. */ | 1397 | /* Clean up data we have read: This will do ACK frames. */ |
1391 | if (copied > 0) | 1398 | if (copied > 0) { |
1392 | tcp_cleanup_rbuf(sk, copied); | 1399 | tcp_cleanup_rbuf(sk, copied); |
1400 | uid_stat_tcp_rcv(current_uid(), copied); | ||
1401 | } | ||
1402 | |||
1393 | return copied; | 1403 | return copied; |
1394 | } | 1404 | } |
1395 | EXPORT_SYMBOL(tcp_read_sock); | 1405 | EXPORT_SYMBOL(tcp_read_sock); |
@@ -1771,6 +1781,9 @@ skip_copy: | |||
1771 | tcp_cleanup_rbuf(sk, copied); | 1781 | tcp_cleanup_rbuf(sk, copied); |
1772 | 1782 | ||
1773 | release_sock(sk); | 1783 | release_sock(sk); |
1784 | |||
1785 | if (copied > 0) | ||
1786 | uid_stat_tcp_rcv(current_uid(), copied); | ||
1774 | return copied; | 1787 | return copied; |
1775 | 1788 | ||
1776 | out: | 1789 | out: |
@@ -1779,6 +1792,8 @@ out: | |||
1779 | 1792 | ||
1780 | recv_urg: | 1793 | recv_urg: |
1781 | err = tcp_recv_urg(sk, msg, len, flags); | 1794 | err = tcp_recv_urg(sk, msg, len, flags); |
1795 | if (err > 0) | ||
1796 | uid_stat_tcp_rcv(current_uid(), err); | ||
1782 | goto out; | 1797 | goto out; |
1783 | } | 1798 | } |
1784 | EXPORT_SYMBOL(tcp_recvmsg); | 1799 | EXPORT_SYMBOL(tcp_recvmsg); |
@@ -3310,3 +3325,107 @@ void __init tcp_init(void) | |||
3310 | tcp_secret_retiring = &tcp_secret_two; | 3325 | tcp_secret_retiring = &tcp_secret_two; |
3311 | tcp_secret_secondary = &tcp_secret_two; | 3326 | tcp_secret_secondary = &tcp_secret_two; |
3312 | } | 3327 | } |
3328 | |||
3329 | static int tcp_is_local(struct net *net, __be32 addr) { | ||
3330 | struct rtable *rt; | ||
3331 | struct flowi4 fl4 = { .daddr = addr }; | ||
3332 | rt = ip_route_output_key(net, &fl4); | ||
3333 | if (IS_ERR_OR_NULL(rt)) | ||
3334 | return 0; | ||
3335 | return rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK); | ||
3336 | } | ||
3337 | |||
3338 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
3339 | static int tcp_is_local6(struct net *net, struct in6_addr *addr) { | ||
3340 | struct rt6_info *rt6 = rt6_lookup(net, addr, addr, 0, 0); | ||
3341 | return rt6 && rt6->rt6i_dev && (rt6->rt6i_dev->flags & IFF_LOOPBACK); | ||
3342 | } | ||
3343 | #endif | ||
3344 | |||
3345 | /* | ||
3346 | * tcp_nuke_addr - destroy all sockets on the given local address | ||
3347 | * if local address is the unspecified address (0.0.0.0 or ::), destroy all | ||
3348 | * sockets with local addresses that are not configured. | ||
3349 | */ | ||
3350 | int tcp_nuke_addr(struct net *net, struct sockaddr *addr) | ||
3351 | { | ||
3352 | int family = addr->sa_family; | ||
3353 | unsigned int bucket; | ||
3354 | |||
3355 | struct in_addr *in; | ||
3356 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
3357 | struct in6_addr *in6; | ||
3358 | #endif | ||
3359 | if (family == AF_INET) { | ||
3360 | in = &((struct sockaddr_in *)addr)->sin_addr; | ||
3361 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
3362 | } else if (family == AF_INET6) { | ||
3363 | in6 = &((struct sockaddr_in6 *)addr)->sin6_addr; | ||
3364 | #endif | ||
3365 | } else { | ||
3366 | return -EAFNOSUPPORT; | ||
3367 | } | ||
3368 | |||
3369 | for (bucket = 0; bucket < tcp_hashinfo.ehash_mask; bucket++) { | ||
3370 | struct hlist_nulls_node *node; | ||
3371 | struct sock *sk; | ||
3372 | spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, bucket); | ||
3373 | |||
3374 | restart: | ||
3375 | spin_lock_bh(lock); | ||
3376 | sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[bucket].chain) { | ||
3377 | struct inet_sock *inet = inet_sk(sk); | ||
3378 | |||
3379 | if (sysctl_ip_dynaddr && sk->sk_state == TCP_SYN_SENT) | ||
3380 | continue; | ||
3381 | if (sock_flag(sk, SOCK_DEAD)) | ||
3382 | continue; | ||
3383 | |||
3384 | if (family == AF_INET) { | ||
3385 | __be32 s4 = inet->inet_rcv_saddr; | ||
3386 | if (s4 == LOOPBACK4_IPV6) | ||
3387 | continue; | ||
3388 | |||
3389 | if (in->s_addr != s4 && | ||
3390 | !(in->s_addr == INADDR_ANY && | ||
3391 | !tcp_is_local(net, s4))) | ||
3392 | continue; | ||
3393 | } | ||
3394 | |||
3395 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
3396 | if (family == AF_INET6) { | ||
3397 | struct in6_addr *s6; | ||
3398 | if (!inet->pinet6) | ||
3399 | continue; | ||
3400 | |||
3401 | s6 = &inet->pinet6->rcv_saddr; | ||
3402 | if (ipv6_addr_type(s6) == IPV6_ADDR_MAPPED) | ||
3403 | continue; | ||
3404 | |||
3405 | if (!ipv6_addr_equal(in6, s6) && | ||
3406 | !(ipv6_addr_equal(in6, &in6addr_any) && | ||
3407 | !tcp_is_local6(net, s6))) | ||
3408 | continue; | ||
3409 | } | ||
3410 | #endif | ||
3411 | |||
3412 | sock_hold(sk); | ||
3413 | spin_unlock_bh(lock); | ||
3414 | |||
3415 | local_bh_disable(); | ||
3416 | bh_lock_sock(sk); | ||
3417 | sk->sk_err = ETIMEDOUT; | ||
3418 | sk->sk_error_report(sk); | ||
3419 | |||
3420 | tcp_done(sk); | ||
3421 | bh_unlock_sock(sk); | ||
3422 | local_bh_enable(); | ||
3423 | sock_put(sk); | ||
3424 | |||
3425 | goto restart; | ||
3426 | } | ||
3427 | spin_unlock_bh(lock); | ||
3428 | } | ||
3429 | |||
3430 | return 0; | ||
3431 | } | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bef9f04c22b..d73aab3fbfc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -880,6 +880,11 @@ static void tcp_init_metrics(struct sock *sk) | |||
880 | tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); | 880 | tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); |
881 | if (tp->snd_ssthresh > tp->snd_cwnd_clamp) | 881 | if (tp->snd_ssthresh > tp->snd_cwnd_clamp) |
882 | tp->snd_ssthresh = tp->snd_cwnd_clamp; | 882 | tp->snd_ssthresh = tp->snd_cwnd_clamp; |
883 | } else { | ||
884 | /* ssthresh may have been reduced unnecessarily during. | ||
885 | * 3WHS. Restore it back to its initial default. | ||
886 | */ | ||
887 | tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; | ||
883 | } | 888 | } |
884 | if (dst_metric(dst, RTAX_REORDERING) && | 889 | if (dst_metric(dst, RTAX_REORDERING) && |
885 | tp->reordering != dst_metric(dst, RTAX_REORDERING)) { | 890 | tp->reordering != dst_metric(dst, RTAX_REORDERING)) { |
@@ -887,10 +892,7 @@ static void tcp_init_metrics(struct sock *sk) | |||
887 | tp->reordering = dst_metric(dst, RTAX_REORDERING); | 892 | tp->reordering = dst_metric(dst, RTAX_REORDERING); |
888 | } | 893 | } |
889 | 894 | ||
890 | if (dst_metric(dst, RTAX_RTT) == 0) | 895 | if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0) |
891 | goto reset; | ||
892 | |||
893 | if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3)) | ||
894 | goto reset; | 896 | goto reset; |
895 | 897 | ||
896 | /* Initial rtt is determined from SYN,SYN-ACK. | 898 | /* Initial rtt is determined from SYN,SYN-ACK. |
@@ -916,19 +918,26 @@ static void tcp_init_metrics(struct sock *sk) | |||
916 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); | 918 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); |
917 | } | 919 | } |
918 | tcp_set_rto(sk); | 920 | tcp_set_rto(sk); |
919 | if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) { | ||
920 | reset: | 921 | reset: |
921 | /* Play conservative. If timestamps are not | 922 | if (tp->srtt == 0) { |
922 | * supported, TCP will fail to recalculate correct | 923 | /* RFC2988bis: We've failed to get a valid RTT sample from |
923 | * rtt, if initial rto is too small. FORGET ALL AND RESET! | 924 | * 3WHS. This is most likely due to retransmission, |
925 | * including spurious one. Reset the RTO back to 3secs | ||
926 | * from the more aggressive 1sec to avoid more spurious | ||
927 | * retransmission. | ||
924 | */ | 928 | */ |
925 | if (!tp->rx_opt.saw_tstamp && tp->srtt) { | 929 | tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; |
926 | tp->srtt = 0; | 930 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; |
927 | tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; | ||
928 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; | ||
929 | } | ||
930 | } | 931 | } |
931 | tp->snd_cwnd = tcp_init_cwnd(tp, dst); | 932 | /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been |
933 | * retransmitted. In light of RFC2988bis' more aggressive 1sec | ||
934 | * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK | ||
935 | * retransmission has occurred. | ||
936 | */ | ||
937 | if (tp->total_retrans > 1) | ||
938 | tp->snd_cwnd = 1; | ||
939 | else | ||
940 | tp->snd_cwnd = tcp_init_cwnd(tp, dst); | ||
932 | tp->snd_cwnd_stamp = tcp_time_stamp; | 941 | tp->snd_cwnd_stamp = tcp_time_stamp; |
933 | } | 942 | } |
934 | 943 | ||
@@ -1115,7 +1124,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack, | |||
1115 | return 0; | 1124 | return 0; |
1116 | 1125 | ||
1117 | /* ...Then it's D-SACK, and must reside below snd_una completely */ | 1126 | /* ...Then it's D-SACK, and must reside below snd_una completely */ |
1118 | if (!after(end_seq, tp->snd_una)) | 1127 | if (after(end_seq, tp->snd_una)) |
1119 | return 0; | 1128 | return 0; |
1120 | 1129 | ||
1121 | if (!before(start_seq, tp->undo_marker)) | 1130 | if (!before(start_seq, tp->undo_marker)) |
@@ -1380,9 +1389,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | |||
1380 | 1389 | ||
1381 | BUG_ON(!pcount); | 1390 | BUG_ON(!pcount); |
1382 | 1391 | ||
1383 | /* Tweak before seqno plays */ | 1392 | if (skb == tp->lost_skb_hint) |
1384 | if (!tcp_is_fack(tp) && tcp_is_sack(tp) && tp->lost_skb_hint && | ||
1385 | !before(TCP_SKB_CB(tp->lost_skb_hint)->seq, TCP_SKB_CB(skb)->seq)) | ||
1386 | tp->lost_cnt_hint += pcount; | 1393 | tp->lost_cnt_hint += pcount; |
1387 | 1394 | ||
1388 | TCP_SKB_CB(prev)->end_seq += shifted; | 1395 | TCP_SKB_CB(prev)->end_seq += shifted; |
@@ -3112,12 +3119,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) | |||
3112 | tcp_xmit_retransmit_queue(sk); | 3119 | tcp_xmit_retransmit_queue(sk); |
3113 | } | 3120 | } |
3114 | 3121 | ||
3115 | static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) | 3122 | void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) |
3116 | { | 3123 | { |
3117 | tcp_rtt_estimator(sk, seq_rtt); | 3124 | tcp_rtt_estimator(sk, seq_rtt); |
3118 | tcp_set_rto(sk); | 3125 | tcp_set_rto(sk); |
3119 | inet_csk(sk)->icsk_backoff = 0; | 3126 | inet_csk(sk)->icsk_backoff = 0; |
3120 | } | 3127 | } |
3128 | EXPORT_SYMBOL(tcp_valid_rtt_meas); | ||
3121 | 3129 | ||
3122 | /* Read draft-ietf-tcplw-high-performance before mucking | 3130 | /* Read draft-ietf-tcplw-high-performance before mucking |
3123 | * with this code. (Supersedes RFC1323) | 3131 | * with this code. (Supersedes RFC1323) |
@@ -5806,12 +5814,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5806 | tp->rx_opt.snd_wscale; | 5814 | tp->rx_opt.snd_wscale; |
5807 | tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); | 5815 | tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); |
5808 | 5816 | ||
5809 | /* tcp_ack considers this ACK as duplicate | ||
5810 | * and does not calculate rtt. | ||
5811 | * Force it here. | ||
5812 | */ | ||
5813 | tcp_ack_update_rtt(sk, 0, 0); | ||
5814 | |||
5815 | if (tp->rx_opt.tstamp_ok) | 5817 | if (tp->rx_opt.tstamp_ok) |
5816 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; | 5818 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; |
5817 | 5819 | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 708dc203b03..6cdf6a28f6b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -72,6 +72,7 @@ | |||
72 | #include <net/timewait_sock.h> | 72 | #include <net/timewait_sock.h> |
73 | #include <net/xfrm.h> | 73 | #include <net/xfrm.h> |
74 | #include <net/netdma.h> | 74 | #include <net/netdma.h> |
75 | #include <net/secure_seq.h> | ||
75 | 76 | ||
76 | #include <linux/inet.h> | 77 | #include <linux/inet.h> |
77 | #include <linux/ipv6.h> | 78 | #include <linux/ipv6.h> |
@@ -429,8 +430,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
429 | break; | 430 | break; |
430 | 431 | ||
431 | icsk->icsk_backoff--; | 432 | icsk->icsk_backoff--; |
432 | inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << | 433 | inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) : |
433 | icsk->icsk_backoff; | 434 | TCP_TIMEOUT_INIT) << icsk->icsk_backoff; |
434 | tcp_bound_rto(sk); | 435 | tcp_bound_rto(sk); |
435 | 436 | ||
436 | skb = tcp_write_queue_head(sk); | 437 | skb = tcp_write_queue_head(sk); |
@@ -629,7 +630,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) | |||
629 | arg.iov[0].iov_len = sizeof(rep.th); | 630 | arg.iov[0].iov_len = sizeof(rep.th); |
630 | 631 | ||
631 | #ifdef CONFIG_TCP_MD5SIG | 632 | #ifdef CONFIG_TCP_MD5SIG |
632 | key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL; | 633 | key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL; |
633 | if (key) { | 634 | if (key) { |
634 | rep.opt[0] = htonl((TCPOPT_NOP << 24) | | 635 | rep.opt[0] = htonl((TCPOPT_NOP << 24) | |
635 | (TCPOPT_NOP << 16) | | 636 | (TCPOPT_NOP << 16) | |
@@ -807,20 +808,38 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) | |||
807 | kfree(inet_rsk(req)->opt); | 808 | kfree(inet_rsk(req)->opt); |
808 | } | 809 | } |
809 | 810 | ||
810 | static void syn_flood_warning(const struct sk_buff *skb) | 811 | /* |
812 | * Return 1 if a syncookie should be sent | ||
813 | */ | ||
814 | int tcp_syn_flood_action(struct sock *sk, | ||
815 | const struct sk_buff *skb, | ||
816 | const char *proto) | ||
811 | { | 817 | { |
812 | const char *msg; | 818 | const char *msg = "Dropping request"; |
819 | int want_cookie = 0; | ||
820 | struct listen_sock *lopt; | ||
821 | |||
822 | |||
813 | 823 | ||
814 | #ifdef CONFIG_SYN_COOKIES | 824 | #ifdef CONFIG_SYN_COOKIES |
815 | if (sysctl_tcp_syncookies) | 825 | if (sysctl_tcp_syncookies) { |
816 | msg = "Sending cookies"; | 826 | msg = "Sending cookies"; |
817 | else | 827 | want_cookie = 1; |
828 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); | ||
829 | } else | ||
818 | #endif | 830 | #endif |
819 | msg = "Dropping request"; | 831 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); |
820 | 832 | ||
821 | pr_info("TCP: Possible SYN flooding on port %d. %s.\n", | 833 | lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; |
822 | ntohs(tcp_hdr(skb)->dest), msg); | 834 | if (!lopt->synflood_warned) { |
835 | lopt->synflood_warned = 1; | ||
836 | pr_info("%s: Possible SYN flooding on port %d. %s. " | ||
837 | " Check SNMP counters.\n", | ||
838 | proto, ntohs(tcp_hdr(skb)->dest), msg); | ||
839 | } | ||
840 | return want_cookie; | ||
823 | } | 841 | } |
842 | EXPORT_SYMBOL(tcp_syn_flood_action); | ||
824 | 843 | ||
825 | /* | 844 | /* |
826 | * Save and compile IPv4 options into the request_sock if needed. | 845 | * Save and compile IPv4 options into the request_sock if needed. |
@@ -908,18 +927,21 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, | |||
908 | } | 927 | } |
909 | sk_nocaps_add(sk, NETIF_F_GSO_MASK); | 928 | sk_nocaps_add(sk, NETIF_F_GSO_MASK); |
910 | } | 929 | } |
911 | if (tcp_alloc_md5sig_pool(sk) == NULL) { | 930 | |
931 | md5sig = tp->md5sig_info; | ||
932 | if (md5sig->entries4 == 0 && | ||
933 | tcp_alloc_md5sig_pool(sk) == NULL) { | ||
912 | kfree(newkey); | 934 | kfree(newkey); |
913 | return -ENOMEM; | 935 | return -ENOMEM; |
914 | } | 936 | } |
915 | md5sig = tp->md5sig_info; | ||
916 | 937 | ||
917 | if (md5sig->alloced4 == md5sig->entries4) { | 938 | if (md5sig->alloced4 == md5sig->entries4) { |
918 | keys = kmalloc((sizeof(*keys) * | 939 | keys = kmalloc((sizeof(*keys) * |
919 | (md5sig->entries4 + 1)), GFP_ATOMIC); | 940 | (md5sig->entries4 + 1)), GFP_ATOMIC); |
920 | if (!keys) { | 941 | if (!keys) { |
921 | kfree(newkey); | 942 | kfree(newkey); |
922 | tcp_free_md5sig_pool(); | 943 | if (md5sig->entries4 == 0) |
944 | tcp_free_md5sig_pool(); | ||
923 | return -ENOMEM; | 945 | return -ENOMEM; |
924 | } | 946 | } |
925 | 947 | ||
@@ -963,6 +985,7 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) | |||
963 | kfree(tp->md5sig_info->keys4); | 985 | kfree(tp->md5sig_info->keys4); |
964 | tp->md5sig_info->keys4 = NULL; | 986 | tp->md5sig_info->keys4 = NULL; |
965 | tp->md5sig_info->alloced4 = 0; | 987 | tp->md5sig_info->alloced4 = 0; |
988 | tcp_free_md5sig_pool(); | ||
966 | } else if (tp->md5sig_info->entries4 != i) { | 989 | } else if (tp->md5sig_info->entries4 != i) { |
967 | /* Need to do some manipulation */ | 990 | /* Need to do some manipulation */ |
968 | memmove(&tp->md5sig_info->keys4[i], | 991 | memmove(&tp->md5sig_info->keys4[i], |
@@ -970,7 +993,6 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) | |||
970 | (tp->md5sig_info->entries4 - i) * | 993 | (tp->md5sig_info->entries4 - i) * |
971 | sizeof(struct tcp4_md5sig_key)); | 994 | sizeof(struct tcp4_md5sig_key)); |
972 | } | 995 | } |
973 | tcp_free_md5sig_pool(); | ||
974 | return 0; | 996 | return 0; |
975 | } | 997 | } |
976 | } | 998 | } |
@@ -1234,11 +1256,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1234 | __be32 saddr = ip_hdr(skb)->saddr; | 1256 | __be32 saddr = ip_hdr(skb)->saddr; |
1235 | __be32 daddr = ip_hdr(skb)->daddr; | 1257 | __be32 daddr = ip_hdr(skb)->daddr; |
1236 | __u32 isn = TCP_SKB_CB(skb)->when; | 1258 | __u32 isn = TCP_SKB_CB(skb)->when; |
1237 | #ifdef CONFIG_SYN_COOKIES | ||
1238 | int want_cookie = 0; | 1259 | int want_cookie = 0; |
1239 | #else | ||
1240 | #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */ | ||
1241 | #endif | ||
1242 | 1260 | ||
1243 | /* Never answer to SYNs send to broadcast or multicast */ | 1261 | /* Never answer to SYNs send to broadcast or multicast */ |
1244 | if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) | 1262 | if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) |
@@ -1249,14 +1267,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1249 | * evidently real one. | 1267 | * evidently real one. |
1250 | */ | 1268 | */ |
1251 | if (inet_csk_reqsk_queue_is_full(sk) && !isn) { | 1269 | if (inet_csk_reqsk_queue_is_full(sk) && !isn) { |
1252 | if (net_ratelimit()) | 1270 | want_cookie = tcp_syn_flood_action(sk, skb, "TCP"); |
1253 | syn_flood_warning(skb); | 1271 | if (!want_cookie) |
1254 | #ifdef CONFIG_SYN_COOKIES | 1272 | goto drop; |
1255 | if (sysctl_tcp_syncookies) { | ||
1256 | want_cookie = 1; | ||
1257 | } else | ||
1258 | #endif | ||
1259 | goto drop; | ||
1260 | } | 1273 | } |
1261 | 1274 | ||
1262 | /* Accept backlog is full. If we have already queued enough | 1275 | /* Accept backlog is full. If we have already queued enough |
@@ -1302,9 +1315,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1302 | while (l-- > 0) | 1315 | while (l-- > 0) |
1303 | *c++ ^= *hash_location++; | 1316 | *c++ ^= *hash_location++; |
1304 | 1317 | ||
1305 | #ifdef CONFIG_SYN_COOKIES | ||
1306 | want_cookie = 0; /* not our kind of cookie */ | 1318 | want_cookie = 0; /* not our kind of cookie */ |
1307 | #endif | ||
1308 | tmp_ext.cookie_out_never = 0; /* false */ | 1319 | tmp_ext.cookie_out_never = 0; /* false */ |
1309 | tmp_ext.cookie_plus = tmp_opt.cookie_plus; | 1320 | tmp_ext.cookie_plus = tmp_opt.cookie_plus; |
1310 | } else if (!tp->rx_opt.cookie_in_always) { | 1321 | } else if (!tp->rx_opt.cookie_in_always) { |
@@ -1384,6 +1395,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1384 | isn = tcp_v4_init_sequence(skb); | 1395 | isn = tcp_v4_init_sequence(skb); |
1385 | } | 1396 | } |
1386 | tcp_rsk(req)->snt_isn = isn; | 1397 | tcp_rsk(req)->snt_isn = isn; |
1398 | tcp_rsk(req)->snt_synack = tcp_time_stamp; | ||
1387 | 1399 | ||
1388 | if (tcp_v4_send_synack(sk, dst, req, | 1400 | if (tcp_v4_send_synack(sk, dst, req, |
1389 | (struct request_values *)&tmp_ext) || | 1401 | (struct request_values *)&tmp_ext) || |
@@ -1458,6 +1470,10 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1458 | newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; | 1470 | newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; |
1459 | 1471 | ||
1460 | tcp_initialize_rcv_mss(newsk); | 1472 | tcp_initialize_rcv_mss(newsk); |
1473 | if (tcp_rsk(req)->snt_synack) | ||
1474 | tcp_valid_rtt_meas(newsk, | ||
1475 | tcp_time_stamp - tcp_rsk(req)->snt_synack); | ||
1476 | newtp->total_retrans = req->retrans; | ||
1461 | 1477 | ||
1462 | #ifdef CONFIG_TCP_MD5SIG | 1478 | #ifdef CONFIG_TCP_MD5SIG |
1463 | /* Copy over the MD5 key from the original socket */ | 1479 | /* Copy over the MD5 key from the original socket */ |
@@ -1855,7 +1871,7 @@ static int tcp_v4_init_sock(struct sock *sk) | |||
1855 | * algorithms that we must have the following bandaid to talk | 1871 | * algorithms that we must have the following bandaid to talk |
1856 | * efficiently to them. -DaveM | 1872 | * efficiently to them. -DaveM |
1857 | */ | 1873 | */ |
1858 | tp->snd_cwnd = 2; | 1874 | tp->snd_cwnd = TCP_INIT_CWND; |
1859 | 1875 | ||
1860 | /* See draft-stevens-tcpca-spec-01 for discussion of the | 1876 | /* See draft-stevens-tcpca-spec-01 for discussion of the |
1861 | * initialization of these values. | 1877 | * initialization of these values. |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 80b1f80759a..0ce3d06dce6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -328,6 +328,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
328 | struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); | 328 | struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); |
329 | const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); | 329 | const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); |
330 | 330 | ||
331 | tw->tw_transparent = inet_sk(sk)->transparent; | ||
331 | tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; | 332 | tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; |
332 | tcptw->tw_rcv_nxt = tp->rcv_nxt; | 333 | tcptw->tw_rcv_nxt = tp->rcv_nxt; |
333 | tcptw->tw_snd_nxt = tp->snd_nxt; | 334 | tcptw->tw_snd_nxt = tp->snd_nxt; |
@@ -486,7 +487,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
486 | * algorithms that we must have the following bandaid to talk | 487 | * algorithms that we must have the following bandaid to talk |
487 | * efficiently to them. -DaveM | 488 | * efficiently to them. -DaveM |
488 | */ | 489 | */ |
489 | newtp->snd_cwnd = 2; | 490 | newtp->snd_cwnd = TCP_INIT_CWND; |
490 | newtp->snd_cwnd_cnt = 0; | 491 | newtp->snd_cwnd_cnt = 0; |
491 | newtp->bytes_acked = 0; | 492 | newtp->bytes_acked = 0; |
492 | 493 | ||
@@ -720,6 +721,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
720 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); | 721 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); |
721 | return NULL; | 722 | return NULL; |
722 | } | 723 | } |
724 | if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr) | ||
725 | tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr; | ||
726 | else if (req->retrans) /* don't take RTT sample if retrans && ~TS */ | ||
727 | tcp_rsk(req)->snt_synack = 0; | ||
723 | 728 | ||
724 | /* OK, ACK is valid, create big socket and | 729 | /* OK, ACK is valid, create big socket and |
725 | * feed this segment to it. It will repeat all | 730 | * feed this segment to it. It will repeat all |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 882e0b0964d..faf257b9415 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -1134,11 +1134,9 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) | |||
1134 | sk_mem_uncharge(sk, len); | 1134 | sk_mem_uncharge(sk, len); |
1135 | sock_set_flag(sk, SOCK_QUEUE_SHRUNK); | 1135 | sock_set_flag(sk, SOCK_QUEUE_SHRUNK); |
1136 | 1136 | ||
1137 | /* Any change of skb->len requires recalculation of tso | 1137 | /* Any change of skb->len requires recalculation of tso factor. */ |
1138 | * factor and mss. | ||
1139 | */ | ||
1140 | if (tcp_skb_pcount(skb) > 1) | 1138 | if (tcp_skb_pcount(skb) > 1) |
1141 | tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk)); | 1139 | tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb)); |
1142 | 1140 | ||
1143 | return 0; | 1141 | return 0; |
1144 | } | 1142 | } |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 198f75b7bdd..1b5a19340a9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -105,6 +105,7 @@ | |||
105 | #include <net/route.h> | 105 | #include <net/route.h> |
106 | #include <net/checksum.h> | 106 | #include <net/checksum.h> |
107 | #include <net/xfrm.h> | 107 | #include <net/xfrm.h> |
108 | #include <trace/events/udp.h> | ||
108 | #include "udp_impl.h" | 109 | #include "udp_impl.h" |
109 | 110 | ||
110 | struct udp_table udp_table __read_mostly; | 111 | struct udp_table udp_table __read_mostly; |
@@ -1366,6 +1367,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
1366 | is_udplite); | 1367 | is_udplite); |
1367 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); | 1368 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); |
1368 | kfree_skb(skb); | 1369 | kfree_skb(skb); |
1370 | trace_udp_fail_queue_rcv_skb(rc, sk); | ||
1369 | return -1; | 1371 | return -1; |
1370 | } | 1372 | } |
1371 | 1373 | ||
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 981e43eaf70..a0b4c5da8d4 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -79,13 +79,13 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, | |||
79 | struct rtable *rt = (struct rtable *)xdst->route; | 79 | struct rtable *rt = (struct rtable *)xdst->route; |
80 | const struct flowi4 *fl4 = &fl->u.ip4; | 80 | const struct flowi4 *fl4 = &fl->u.ip4; |
81 | 81 | ||
82 | rt->rt_key_dst = fl4->daddr; | 82 | xdst->u.rt.rt_key_dst = fl4->daddr; |
83 | rt->rt_key_src = fl4->saddr; | 83 | xdst->u.rt.rt_key_src = fl4->saddr; |
84 | rt->rt_key_tos = fl4->flowi4_tos; | 84 | xdst->u.rt.rt_key_tos = fl4->flowi4_tos; |
85 | rt->rt_route_iif = fl4->flowi4_iif; | 85 | xdst->u.rt.rt_route_iif = fl4->flowi4_iif; |
86 | rt->rt_iif = fl4->flowi4_iif; | 86 | xdst->u.rt.rt_iif = fl4->flowi4_iif; |
87 | rt->rt_oif = fl4->flowi4_oif; | 87 | xdst->u.rt.rt_oif = fl4->flowi4_oif; |
88 | rt->rt_mark = fl4->flowi4_mark; | 88 | xdst->u.rt.rt_mark = fl4->flowi4_mark; |
89 | 89 | ||
90 | xdst->u.dst.dev = dev; | 90 | xdst->u.dst.dev = dev; |
91 | dev_hold(dev); | 91 | dev_hold(dev); |
@@ -117,7 +117,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
117 | memset(fl4, 0, sizeof(struct flowi4)); | 117 | memset(fl4, 0, sizeof(struct flowi4)); |
118 | fl4->flowi4_mark = skb->mark; | 118 | fl4->flowi4_mark = skb->mark; |
119 | 119 | ||
120 | if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { | 120 | if (!ip_is_fragment(iph)) { |
121 | switch (iph->protocol) { | 121 | switch (iph->protocol) { |
122 | case IPPROTO_UDP: | 122 | case IPPROTO_UDP: |
123 | case IPPROTO_UDPLITE: | 123 | case IPPROTO_UDPLITE: |