diff options
Diffstat (limited to 'net/ipv4')
33 files changed, 492 insertions, 693 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index eae1f676f87..1b745d412cf 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -465,8 +465,10 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | |||
465 | if (addr_len < sizeof(struct sockaddr_in)) | 465 | if (addr_len < sizeof(struct sockaddr_in)) |
466 | goto out; | 466 | goto out; |
467 | 467 | ||
468 | if (addr->sin_family != AF_INET) | 468 | if (addr->sin_family != AF_INET) { |
469 | err = -EAFNOSUPPORT; | ||
469 | goto out; | 470 | goto out; |
471 | } | ||
470 | 472 | ||
471 | chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); | 473 | chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); |
472 | 474 | ||
@@ -1438,11 +1440,11 @@ EXPORT_SYMBOL_GPL(inet_ctl_sock_create); | |||
1438 | unsigned long snmp_fold_field(void __percpu *mib[], int offt) | 1440 | unsigned long snmp_fold_field(void __percpu *mib[], int offt) |
1439 | { | 1441 | { |
1440 | unsigned long res = 0; | 1442 | unsigned long res = 0; |
1441 | int i; | 1443 | int i, j; |
1442 | 1444 | ||
1443 | for_each_possible_cpu(i) { | 1445 | for_each_possible_cpu(i) { |
1444 | res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt); | 1446 | for (j = 0; j < SNMP_ARRAY_SZ; j++) |
1445 | res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt); | 1447 | res += *(((unsigned long *) per_cpu_ptr(mib[j], i)) + offt); |
1446 | } | 1448 | } |
1447 | return res; | 1449 | return res; |
1448 | } | 1450 | } |
@@ -1456,28 +1458,19 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) | |||
1456 | int cpu; | 1458 | int cpu; |
1457 | 1459 | ||
1458 | for_each_possible_cpu(cpu) { | 1460 | for_each_possible_cpu(cpu) { |
1459 | void *bhptr, *userptr; | 1461 | void *bhptr; |
1460 | struct u64_stats_sync *syncp; | 1462 | struct u64_stats_sync *syncp; |
1461 | u64 v_bh, v_user; | 1463 | u64 v; |
1462 | unsigned int start; | 1464 | unsigned int start; |
1463 | 1465 | ||
1464 | /* first mib used by softirq context, we must use _bh() accessors */ | 1466 | bhptr = per_cpu_ptr(mib[0], cpu); |
1465 | bhptr = per_cpu_ptr(SNMP_STAT_BHPTR(mib), cpu); | ||
1466 | syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); | 1467 | syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); |
1467 | do { | 1468 | do { |
1468 | start = u64_stats_fetch_begin_bh(syncp); | 1469 | start = u64_stats_fetch_begin_bh(syncp); |
1469 | v_bh = *(((u64 *) bhptr) + offt); | 1470 | v = *(((u64 *) bhptr) + offt); |
1470 | } while (u64_stats_fetch_retry_bh(syncp, start)); | 1471 | } while (u64_stats_fetch_retry_bh(syncp, start)); |
1471 | 1472 | ||
1472 | /* second mib used in USER context */ | 1473 | res += v; |
1473 | userptr = per_cpu_ptr(SNMP_STAT_USRPTR(mib), cpu); | ||
1474 | syncp = (struct u64_stats_sync *)(userptr + syncp_offset); | ||
1475 | do { | ||
1476 | start = u64_stats_fetch_begin(syncp); | ||
1477 | v_user = *(((u64 *) userptr) + offt); | ||
1478 | } while (u64_stats_fetch_retry(syncp, start)); | ||
1479 | |||
1480 | res += v_bh + v_user; | ||
1481 | } | 1474 | } |
1482 | return res; | 1475 | return res; |
1483 | } | 1476 | } |
@@ -1489,25 +1482,28 @@ int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align) | |||
1489 | BUG_ON(ptr == NULL); | 1482 | BUG_ON(ptr == NULL); |
1490 | ptr[0] = __alloc_percpu(mibsize, align); | 1483 | ptr[0] = __alloc_percpu(mibsize, align); |
1491 | if (!ptr[0]) | 1484 | if (!ptr[0]) |
1492 | goto err0; | 1485 | return -ENOMEM; |
1486 | #if SNMP_ARRAY_SZ == 2 | ||
1493 | ptr[1] = __alloc_percpu(mibsize, align); | 1487 | ptr[1] = __alloc_percpu(mibsize, align); |
1494 | if (!ptr[1]) | 1488 | if (!ptr[1]) { |
1495 | goto err1; | 1489 | free_percpu(ptr[0]); |
1490 | ptr[0] = NULL; | ||
1491 | return -ENOMEM; | ||
1492 | } | ||
1493 | #endif | ||
1496 | return 0; | 1494 | return 0; |
1497 | err1: | ||
1498 | free_percpu(ptr[0]); | ||
1499 | ptr[0] = NULL; | ||
1500 | err0: | ||
1501 | return -ENOMEM; | ||
1502 | } | 1495 | } |
1503 | EXPORT_SYMBOL_GPL(snmp_mib_init); | 1496 | EXPORT_SYMBOL_GPL(snmp_mib_init); |
1504 | 1497 | ||
1505 | void snmp_mib_free(void __percpu *ptr[2]) | 1498 | void snmp_mib_free(void __percpu *ptr[SNMP_ARRAY_SZ]) |
1506 | { | 1499 | { |
1500 | int i; | ||
1501 | |||
1507 | BUG_ON(ptr == NULL); | 1502 | BUG_ON(ptr == NULL); |
1508 | free_percpu(ptr[0]); | 1503 | for (i = 0; i < SNMP_ARRAY_SZ; i++) { |
1509 | free_percpu(ptr[1]); | 1504 | free_percpu(ptr[i]); |
1510 | ptr[0] = ptr[1] = NULL; | 1505 | ptr[i] = NULL; |
1506 | } | ||
1511 | } | 1507 | } |
1512 | EXPORT_SYMBOL_GPL(snmp_mib_free); | 1508 | EXPORT_SYMBOL_GPL(snmp_mib_free); |
1513 | 1509 | ||
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 1b74d3b6437..96a164aa136 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -97,7 +97,6 @@ | |||
97 | #include <linux/init.h> | 97 | #include <linux/init.h> |
98 | #include <linux/net.h> | 98 | #include <linux/net.h> |
99 | #include <linux/rcupdate.h> | 99 | #include <linux/rcupdate.h> |
100 | #include <linux/jhash.h> | ||
101 | #include <linux/slab.h> | 100 | #include <linux/slab.h> |
102 | #ifdef CONFIG_SYSCTL | 101 | #ifdef CONFIG_SYSCTL |
103 | #include <linux/sysctl.h> | 102 | #include <linux/sysctl.h> |
@@ -139,8 +138,6 @@ static const struct neigh_ops arp_generic_ops = { | |||
139 | .error_report = arp_error_report, | 138 | .error_report = arp_error_report, |
140 | .output = neigh_resolve_output, | 139 | .output = neigh_resolve_output, |
141 | .connected_output = neigh_connected_output, | 140 | .connected_output = neigh_connected_output, |
142 | .hh_output = dev_queue_xmit, | ||
143 | .queue_xmit = dev_queue_xmit, | ||
144 | }; | 141 | }; |
145 | 142 | ||
146 | static const struct neigh_ops arp_hh_ops = { | 143 | static const struct neigh_ops arp_hh_ops = { |
@@ -149,16 +146,12 @@ static const struct neigh_ops arp_hh_ops = { | |||
149 | .error_report = arp_error_report, | 146 | .error_report = arp_error_report, |
150 | .output = neigh_resolve_output, | 147 | .output = neigh_resolve_output, |
151 | .connected_output = neigh_resolve_output, | 148 | .connected_output = neigh_resolve_output, |
152 | .hh_output = dev_queue_xmit, | ||
153 | .queue_xmit = dev_queue_xmit, | ||
154 | }; | 149 | }; |
155 | 150 | ||
156 | static const struct neigh_ops arp_direct_ops = { | 151 | static const struct neigh_ops arp_direct_ops = { |
157 | .family = AF_INET, | 152 | .family = AF_INET, |
158 | .output = dev_queue_xmit, | 153 | .output = neigh_direct_output, |
159 | .connected_output = dev_queue_xmit, | 154 | .connected_output = neigh_direct_output, |
160 | .hh_output = dev_queue_xmit, | ||
161 | .queue_xmit = dev_queue_xmit, | ||
162 | }; | 155 | }; |
163 | 156 | ||
164 | static const struct neigh_ops arp_broken_ops = { | 157 | static const struct neigh_ops arp_broken_ops = { |
@@ -167,8 +160,6 @@ static const struct neigh_ops arp_broken_ops = { | |||
167 | .error_report = arp_error_report, | 160 | .error_report = arp_error_report, |
168 | .output = neigh_compat_output, | 161 | .output = neigh_compat_output, |
169 | .connected_output = neigh_compat_output, | 162 | .connected_output = neigh_compat_output, |
170 | .hh_output = dev_queue_xmit, | ||
171 | .queue_xmit = dev_queue_xmit, | ||
172 | }; | 163 | }; |
173 | 164 | ||
174 | struct neigh_table arp_tbl = { | 165 | struct neigh_table arp_tbl = { |
@@ -232,7 +223,7 @@ static u32 arp_hash(const void *pkey, | |||
232 | const struct net_device *dev, | 223 | const struct net_device *dev, |
233 | __u32 hash_rnd) | 224 | __u32 hash_rnd) |
234 | { | 225 | { |
235 | return jhash_2words(*(u32 *)pkey, dev->ifindex, hash_rnd); | 226 | return arp_hashfn(*(u32 *)pkey, dev, hash_rnd); |
236 | } | 227 | } |
237 | 228 | ||
238 | static int arp_constructor(struct neighbour *neigh) | 229 | static int arp_constructor(struct neighbour *neigh) |
@@ -259,7 +250,7 @@ static int arp_constructor(struct neighbour *neigh) | |||
259 | if (!dev->header_ops) { | 250 | if (!dev->header_ops) { |
260 | neigh->nud_state = NUD_NOARP; | 251 | neigh->nud_state = NUD_NOARP; |
261 | neigh->ops = &arp_direct_ops; | 252 | neigh->ops = &arp_direct_ops; |
262 | neigh->output = neigh->ops->queue_xmit; | 253 | neigh->output = neigh_direct_output; |
263 | } else { | 254 | } else { |
264 | /* Good devices (checked by reading texts, but only Ethernet is | 255 | /* Good devices (checked by reading texts, but only Ethernet is |
265 | tested) | 256 | tested) |
@@ -518,30 +509,6 @@ EXPORT_SYMBOL(arp_find); | |||
518 | 509 | ||
519 | /* END OF OBSOLETE FUNCTIONS */ | 510 | /* END OF OBSOLETE FUNCTIONS */ |
520 | 511 | ||
521 | int arp_bind_neighbour(struct dst_entry *dst) | ||
522 | { | ||
523 | struct net_device *dev = dst->dev; | ||
524 | struct neighbour *n = dst->neighbour; | ||
525 | |||
526 | if (dev == NULL) | ||
527 | return -EINVAL; | ||
528 | if (n == NULL) { | ||
529 | __be32 nexthop = ((struct rtable *)dst)->rt_gateway; | ||
530 | if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) | ||
531 | nexthop = 0; | ||
532 | n = __neigh_lookup_errno( | ||
533 | #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) | ||
534 | dev->type == ARPHRD_ATM ? | ||
535 | clip_tbl_hook : | ||
536 | #endif | ||
537 | &arp_tbl, &nexthop, dev); | ||
538 | if (IS_ERR(n)) | ||
539 | return PTR_ERR(n); | ||
540 | dst->neighbour = n; | ||
541 | } | ||
542 | return 0; | ||
543 | } | ||
544 | |||
545 | /* | 512 | /* |
546 | * Check if we can use proxy ARP for this path | 513 | * Check if we can use proxy ARP for this path |
547 | */ | 514 | */ |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 0d4a184af16..37b3c188d8b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -1833,8 +1833,8 @@ void __init devinet_init(void) | |||
1833 | 1833 | ||
1834 | rtnl_af_register(&inet_af_ops); | 1834 | rtnl_af_register(&inet_af_ops); |
1835 | 1835 | ||
1836 | rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); | 1836 | rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); |
1837 | rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); | 1837 | rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); |
1838 | rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); | 1838 | rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); |
1839 | } | 1839 | } |
1840 | 1840 | ||
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 22524716fe7..92fc5f69f5d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -1124,9 +1124,9 @@ static struct pernet_operations fib_net_ops = { | |||
1124 | 1124 | ||
1125 | void __init ip_fib_init(void) | 1125 | void __init ip_fib_init(void) |
1126 | { | 1126 | { |
1127 | rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); | 1127 | rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); |
1128 | rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); | 1128 | rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); |
1129 | rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); | 1129 | rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); |
1130 | 1130 | ||
1131 | register_pernet_subsys(&fib_net_ops); | 1131 | register_pernet_subsys(&fib_net_ops); |
1132 | register_netdevice_notifier(&fib_netdev_notifier); | 1132 | register_netdevice_notifier(&fib_netdev_notifier); |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 58c25ea5a5c..de9e2978476 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -110,9 +110,10 @@ struct leaf { | |||
110 | 110 | ||
111 | struct leaf_info { | 111 | struct leaf_info { |
112 | struct hlist_node hlist; | 112 | struct hlist_node hlist; |
113 | struct rcu_head rcu; | ||
114 | int plen; | 113 | int plen; |
114 | u32 mask_plen; /* ntohl(inet_make_mask(plen)) */ | ||
115 | struct list_head falh; | 115 | struct list_head falh; |
116 | struct rcu_head rcu; | ||
116 | }; | 117 | }; |
117 | 118 | ||
118 | struct tnode { | 119 | struct tnode { |
@@ -451,6 +452,7 @@ static struct leaf_info *leaf_info_new(int plen) | |||
451 | struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); | 452 | struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); |
452 | if (li) { | 453 | if (li) { |
453 | li->plen = plen; | 454 | li->plen = plen; |
455 | li->mask_plen = ntohl(inet_make_mask(plen)); | ||
454 | INIT_LIST_HEAD(&li->falh); | 456 | INIT_LIST_HEAD(&li->falh); |
455 | } | 457 | } |
456 | return li; | 458 | return li; |
@@ -1359,10 +1361,8 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, | |||
1359 | 1361 | ||
1360 | hlist_for_each_entry_rcu(li, node, hhead, hlist) { | 1362 | hlist_for_each_entry_rcu(li, node, hhead, hlist) { |
1361 | struct fib_alias *fa; | 1363 | struct fib_alias *fa; |
1362 | int plen = li->plen; | ||
1363 | __be32 mask = inet_make_mask(plen); | ||
1364 | 1364 | ||
1365 | if (l->key != (key & ntohl(mask))) | 1365 | if (l->key != (key & li->mask_plen)) |
1366 | continue; | 1366 | continue; |
1367 | 1367 | ||
1368 | list_for_each_entry_rcu(fa, &li->falh, fa_list) { | 1368 | list_for_each_entry_rcu(fa, &li->falh, fa_list) { |
@@ -1394,7 +1394,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, | |||
1394 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 1394 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
1395 | t->stats.semantic_match_passed++; | 1395 | t->stats.semantic_match_passed++; |
1396 | #endif | 1396 | #endif |
1397 | res->prefixlen = plen; | 1397 | res->prefixlen = li->plen; |
1398 | res->nh_sel = nhsel; | 1398 | res->nh_sel = nhsel; |
1399 | res->type = fa->fa_type; | 1399 | res->type = fa->fa_type; |
1400 | res->scope = fa->fa_info->fib_scope; | 1400 | res->scope = fa->fa_info->fib_scope; |
@@ -1402,7 +1402,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, | |||
1402 | res->table = tb; | 1402 | res->table = tb; |
1403 | res->fa_head = &li->falh; | 1403 | res->fa_head = &li->falh; |
1404 | if (!(fib_flags & FIB_LOOKUP_NOREF)) | 1404 | if (!(fib_flags & FIB_LOOKUP_NOREF)) |
1405 | atomic_inc(&res->fi->fib_clntref); | 1405 | atomic_inc(&fi->fib_clntref); |
1406 | return 0; | 1406 | return 0; |
1407 | } | 1407 | } |
1408 | } | 1408 | } |
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index c6933f2ea31..9dbe10875fb 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c | |||
@@ -16,7 +16,6 @@ | |||
16 | #include <linux/skbuff.h> | 16 | #include <linux/skbuff.h> |
17 | #include <linux/in.h> | 17 | #include <linux/in.h> |
18 | #include <linux/netdevice.h> | 18 | #include <linux/netdevice.h> |
19 | #include <linux/version.h> | ||
20 | #include <linux/spinlock.h> | 19 | #include <linux/spinlock.h> |
21 | #include <net/protocol.h> | 20 | #include <net/protocol.h> |
22 | #include <net/gre.h> | 21 | #include <net/gre.h> |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 5395e45dcce..23ef31baa1a 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -380,6 +380,7 @@ static struct rtable *icmp_route_lookup(struct net *net, | |||
380 | struct icmp_bxm *param) | 380 | struct icmp_bxm *param) |
381 | { | 381 | { |
382 | struct rtable *rt, *rt2; | 382 | struct rtable *rt, *rt2; |
383 | struct flowi4 fl4_dec; | ||
383 | int err; | 384 | int err; |
384 | 385 | ||
385 | memset(fl4, 0, sizeof(*fl4)); | 386 | memset(fl4, 0, sizeof(*fl4)); |
@@ -408,19 +409,19 @@ static struct rtable *icmp_route_lookup(struct net *net, | |||
408 | } else | 409 | } else |
409 | return rt; | 410 | return rt; |
410 | 411 | ||
411 | err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(fl4), AF_INET); | 412 | err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4_dec), AF_INET); |
412 | if (err) | 413 | if (err) |
413 | goto relookup_failed; | 414 | goto relookup_failed; |
414 | 415 | ||
415 | if (inet_addr_type(net, fl4->saddr) == RTN_LOCAL) { | 416 | if (inet_addr_type(net, fl4_dec.saddr) == RTN_LOCAL) { |
416 | rt2 = __ip_route_output_key(net, fl4); | 417 | rt2 = __ip_route_output_key(net, &fl4_dec); |
417 | if (IS_ERR(rt2)) | 418 | if (IS_ERR(rt2)) |
418 | err = PTR_ERR(rt2); | 419 | err = PTR_ERR(rt2); |
419 | } else { | 420 | } else { |
420 | struct flowi4 fl4_2 = {}; | 421 | struct flowi4 fl4_2 = {}; |
421 | unsigned long orefdst; | 422 | unsigned long orefdst; |
422 | 423 | ||
423 | fl4_2.daddr = fl4->saddr; | 424 | fl4_2.daddr = fl4_dec.saddr; |
424 | rt2 = ip_route_output_key(net, &fl4_2); | 425 | rt2 = ip_route_output_key(net, &fl4_2); |
425 | if (IS_ERR(rt2)) { | 426 | if (IS_ERR(rt2)) { |
426 | err = PTR_ERR(rt2); | 427 | err = PTR_ERR(rt2); |
@@ -428,7 +429,7 @@ static struct rtable *icmp_route_lookup(struct net *net, | |||
428 | } | 429 | } |
429 | /* Ugh! */ | 430 | /* Ugh! */ |
430 | orefdst = skb_in->_skb_refdst; /* save old refdst */ | 431 | orefdst = skb_in->_skb_refdst; /* save old refdst */ |
431 | err = ip_route_input(skb_in, fl4->daddr, fl4->saddr, | 432 | err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr, |
432 | RT_TOS(tos), rt2->dst.dev); | 433 | RT_TOS(tos), rt2->dst.dev); |
433 | 434 | ||
434 | dst_release(&rt2->dst); | 435 | dst_release(&rt2->dst); |
@@ -440,10 +441,11 @@ static struct rtable *icmp_route_lookup(struct net *net, | |||
440 | goto relookup_failed; | 441 | goto relookup_failed; |
441 | 442 | ||
442 | rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, | 443 | rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, |
443 | flowi4_to_flowi(fl4), NULL, | 444 | flowi4_to_flowi(&fl4_dec), NULL, |
444 | XFRM_LOOKUP_ICMP); | 445 | XFRM_LOOKUP_ICMP); |
445 | if (!IS_ERR(rt2)) { | 446 | if (!IS_ERR(rt2)) { |
446 | dst_release(&rt->dst); | 447 | dst_release(&rt->dst); |
448 | memcpy(fl4, &fl4_dec, sizeof(*fl4)); | ||
447 | rt = rt2; | 449 | rt = rt2; |
448 | } else if (PTR_ERR(rt2) == -EPERM) { | 450 | } else if (PTR_ERR(rt2) == -EPERM) { |
449 | if (rt) | 451 | if (rt) |
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 3267d389843..389a2e6a17f 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
@@ -869,7 +869,7 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
869 | } | 869 | } |
870 | 870 | ||
871 | return netlink_dump_start(idiagnl, skb, nlh, | 871 | return netlink_dump_start(idiagnl, skb, nlh, |
872 | inet_diag_dump, NULL); | 872 | inet_diag_dump, NULL, 0); |
873 | } | 873 | } |
874 | 874 | ||
875 | return inet_diag_get_exact(skb, nlh); | 875 | return inet_diag_get_exact(skb, nlh); |
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index 85a0f75dae6..ef7ae6049a5 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c | |||
@@ -146,8 +146,7 @@ static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len) | |||
146 | } | 146 | } |
147 | 147 | ||
148 | static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb, | 148 | static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb, |
149 | struct iphdr *iph, struct tcphdr *tcph, | 149 | struct iphdr *iph, struct tcphdr *tcph) |
150 | u16 vlan_tag, struct vlan_group *vgrp) | ||
151 | { | 150 | { |
152 | int nr_frags; | 151 | int nr_frags; |
153 | __be32 *ptr; | 152 | __be32 *ptr; |
@@ -173,8 +172,6 @@ static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb, | |||
173 | } | 172 | } |
174 | 173 | ||
175 | lro_desc->mss = tcp_data_len; | 174 | lro_desc->mss = tcp_data_len; |
176 | lro_desc->vgrp = vgrp; | ||
177 | lro_desc->vlan_tag = vlan_tag; | ||
178 | lro_desc->active = 1; | 175 | lro_desc->active = 1; |
179 | 176 | ||
180 | lro_desc->data_csum = lro_tcp_data_csum(iph, tcph, | 177 | lro_desc->data_csum = lro_tcp_data_csum(iph, tcph, |
@@ -309,29 +306,17 @@ static void lro_flush(struct net_lro_mgr *lro_mgr, | |||
309 | 306 | ||
310 | skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss; | 307 | skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss; |
311 | 308 | ||
312 | if (lro_desc->vgrp) { | 309 | if (lro_mgr->features & LRO_F_NAPI) |
313 | if (lro_mgr->features & LRO_F_NAPI) | 310 | netif_receive_skb(lro_desc->parent); |
314 | vlan_hwaccel_receive_skb(lro_desc->parent, | 311 | else |
315 | lro_desc->vgrp, | 312 | netif_rx(lro_desc->parent); |
316 | lro_desc->vlan_tag); | ||
317 | else | ||
318 | vlan_hwaccel_rx(lro_desc->parent, | ||
319 | lro_desc->vgrp, | ||
320 | lro_desc->vlan_tag); | ||
321 | |||
322 | } else { | ||
323 | if (lro_mgr->features & LRO_F_NAPI) | ||
324 | netif_receive_skb(lro_desc->parent); | ||
325 | else | ||
326 | netif_rx(lro_desc->parent); | ||
327 | } | ||
328 | 313 | ||
329 | LRO_INC_STATS(lro_mgr, flushed); | 314 | LRO_INC_STATS(lro_mgr, flushed); |
330 | lro_clear_desc(lro_desc); | 315 | lro_clear_desc(lro_desc); |
331 | } | 316 | } |
332 | 317 | ||
333 | static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, | 318 | static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, |
334 | struct vlan_group *vgrp, u16 vlan_tag, void *priv) | 319 | void *priv) |
335 | { | 320 | { |
336 | struct net_lro_desc *lro_desc; | 321 | struct net_lro_desc *lro_desc; |
337 | struct iphdr *iph; | 322 | struct iphdr *iph; |
@@ -360,7 +345,7 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, | |||
360 | goto out; | 345 | goto out; |
361 | 346 | ||
362 | skb->ip_summed = lro_mgr->ip_summed_aggr; | 347 | skb->ip_summed = lro_mgr->ip_summed_aggr; |
363 | lro_init_desc(lro_desc, skb, iph, tcph, vlan_tag, vgrp); | 348 | lro_init_desc(lro_desc, skb, iph, tcph); |
364 | LRO_INC_STATS(lro_mgr, aggregated); | 349 | LRO_INC_STATS(lro_mgr, aggregated); |
365 | return 0; | 350 | return 0; |
366 | } | 351 | } |
@@ -433,8 +418,7 @@ static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr, | |||
433 | static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, | 418 | static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, |
434 | struct skb_frag_struct *frags, | 419 | struct skb_frag_struct *frags, |
435 | int len, int true_size, | 420 | int len, int true_size, |
436 | struct vlan_group *vgrp, | 421 | void *priv, __wsum sum) |
437 | u16 vlan_tag, void *priv, __wsum sum) | ||
438 | { | 422 | { |
439 | struct net_lro_desc *lro_desc; | 423 | struct net_lro_desc *lro_desc; |
440 | struct iphdr *iph; | 424 | struct iphdr *iph; |
@@ -480,7 +464,7 @@ static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, | |||
480 | tcph = (void *)((u8 *)skb->data + vlan_hdr_len | 464 | tcph = (void *)((u8 *)skb->data + vlan_hdr_len |
481 | + IP_HDR_LEN(iph)); | 465 | + IP_HDR_LEN(iph)); |
482 | 466 | ||
483 | lro_init_desc(lro_desc, skb, iph, tcph, 0, NULL); | 467 | lro_init_desc(lro_desc, skb, iph, tcph); |
484 | LRO_INC_STATS(lro_mgr, aggregated); | 468 | LRO_INC_STATS(lro_mgr, aggregated); |
485 | return NULL; | 469 | return NULL; |
486 | } | 470 | } |
@@ -514,7 +498,7 @@ void lro_receive_skb(struct net_lro_mgr *lro_mgr, | |||
514 | struct sk_buff *skb, | 498 | struct sk_buff *skb, |
515 | void *priv) | 499 | void *priv) |
516 | { | 500 | { |
517 | if (__lro_proc_skb(lro_mgr, skb, NULL, 0, priv)) { | 501 | if (__lro_proc_skb(lro_mgr, skb, priv)) { |
518 | if (lro_mgr->features & LRO_F_NAPI) | 502 | if (lro_mgr->features & LRO_F_NAPI) |
519 | netif_receive_skb(skb); | 503 | netif_receive_skb(skb); |
520 | else | 504 | else |
@@ -523,29 +507,13 @@ void lro_receive_skb(struct net_lro_mgr *lro_mgr, | |||
523 | } | 507 | } |
524 | EXPORT_SYMBOL(lro_receive_skb); | 508 | EXPORT_SYMBOL(lro_receive_skb); |
525 | 509 | ||
526 | void lro_vlan_hwaccel_receive_skb(struct net_lro_mgr *lro_mgr, | ||
527 | struct sk_buff *skb, | ||
528 | struct vlan_group *vgrp, | ||
529 | u16 vlan_tag, | ||
530 | void *priv) | ||
531 | { | ||
532 | if (__lro_proc_skb(lro_mgr, skb, vgrp, vlan_tag, priv)) { | ||
533 | if (lro_mgr->features & LRO_F_NAPI) | ||
534 | vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag); | ||
535 | else | ||
536 | vlan_hwaccel_rx(skb, vgrp, vlan_tag); | ||
537 | } | ||
538 | } | ||
539 | EXPORT_SYMBOL(lro_vlan_hwaccel_receive_skb); | ||
540 | |||
541 | void lro_receive_frags(struct net_lro_mgr *lro_mgr, | 510 | void lro_receive_frags(struct net_lro_mgr *lro_mgr, |
542 | struct skb_frag_struct *frags, | 511 | struct skb_frag_struct *frags, |
543 | int len, int true_size, void *priv, __wsum sum) | 512 | int len, int true_size, void *priv, __wsum sum) |
544 | { | 513 | { |
545 | struct sk_buff *skb; | 514 | struct sk_buff *skb; |
546 | 515 | ||
547 | skb = __lro_proc_segment(lro_mgr, frags, len, true_size, NULL, 0, | 516 | skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum); |
548 | priv, sum); | ||
549 | if (!skb) | 517 | if (!skb) |
550 | return; | 518 | return; |
551 | 519 | ||
@@ -556,26 +524,6 @@ void lro_receive_frags(struct net_lro_mgr *lro_mgr, | |||
556 | } | 524 | } |
557 | EXPORT_SYMBOL(lro_receive_frags); | 525 | EXPORT_SYMBOL(lro_receive_frags); |
558 | 526 | ||
559 | void lro_vlan_hwaccel_receive_frags(struct net_lro_mgr *lro_mgr, | ||
560 | struct skb_frag_struct *frags, | ||
561 | int len, int true_size, | ||
562 | struct vlan_group *vgrp, | ||
563 | u16 vlan_tag, void *priv, __wsum sum) | ||
564 | { | ||
565 | struct sk_buff *skb; | ||
566 | |||
567 | skb = __lro_proc_segment(lro_mgr, frags, len, true_size, vgrp, | ||
568 | vlan_tag, priv, sum); | ||
569 | if (!skb) | ||
570 | return; | ||
571 | |||
572 | if (lro_mgr->features & LRO_F_NAPI) | ||
573 | vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag); | ||
574 | else | ||
575 | vlan_hwaccel_rx(skb, vgrp, vlan_tag); | ||
576 | } | ||
577 | EXPORT_SYMBOL(lro_vlan_hwaccel_receive_frags); | ||
578 | |||
579 | void lro_flush_all(struct net_lro_mgr *lro_mgr) | 527 | void lro_flush_all(struct net_lro_mgr *lro_mgr) |
580 | { | 528 | { |
581 | int i; | 529 | int i; |
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index ce616d92cc5..e38213817d0 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
@@ -54,15 +54,11 @@ | |||
54 | * 1. Nodes may appear in the tree only with the pool lock held. | 54 | * 1. Nodes may appear in the tree only with the pool lock held. |
55 | * 2. Nodes may disappear from the tree only with the pool lock held | 55 | * 2. Nodes may disappear from the tree only with the pool lock held |
56 | * AND reference count being 0. | 56 | * AND reference count being 0. |
57 | * 3. Nodes appears and disappears from unused node list only under | 57 | * 3. Global variable peer_total is modified under the pool lock. |
58 | * "inet_peer_unused_lock". | 58 | * 4. struct inet_peer fields modification: |
59 | * 4. Global variable peer_total is modified under the pool lock. | ||
60 | * 5. struct inet_peer fields modification: | ||
61 | * avl_left, avl_right, avl_parent, avl_height: pool lock | 59 | * avl_left, avl_right, avl_parent, avl_height: pool lock |
62 | * unused: unused node list lock | ||
63 | * refcnt: atomically against modifications on other CPU; | 60 | * refcnt: atomically against modifications on other CPU; |
64 | * usually under some other lock to prevent node disappearing | 61 | * usually under some other lock to prevent node disappearing |
65 | * dtime: unused node list lock | ||
66 | * daddr: unchangeable | 62 | * daddr: unchangeable |
67 | * ip_id_count: atomic value (no lock needed) | 63 | * ip_id_count: atomic value (no lock needed) |
68 | */ | 64 | */ |
@@ -104,19 +100,6 @@ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries m | |||
104 | * aggressively at this stage */ | 100 | * aggressively at this stage */ |
105 | int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ | 101 | int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ |
106 | int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ | 102 | int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ |
107 | int inet_peer_gc_mintime __read_mostly = 10 * HZ; | ||
108 | int inet_peer_gc_maxtime __read_mostly = 120 * HZ; | ||
109 | |||
110 | static struct { | ||
111 | struct list_head list; | ||
112 | spinlock_t lock; | ||
113 | } unused_peers = { | ||
114 | .list = LIST_HEAD_INIT(unused_peers.list), | ||
115 | .lock = __SPIN_LOCK_UNLOCKED(unused_peers.lock), | ||
116 | }; | ||
117 | |||
118 | static void peer_check_expire(unsigned long dummy); | ||
119 | static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); | ||
120 | 103 | ||
121 | 104 | ||
122 | /* Called from ip_output.c:ip_init */ | 105 | /* Called from ip_output.c:ip_init */ |
@@ -142,21 +125,6 @@ void __init inet_initpeers(void) | |||
142 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, | 125 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, |
143 | NULL); | 126 | NULL); |
144 | 127 | ||
145 | /* All the timers, started at system startup tend | ||
146 | to synchronize. Perturb it a bit. | ||
147 | */ | ||
148 | peer_periodic_timer.expires = jiffies | ||
149 | + net_random() % inet_peer_gc_maxtime | ||
150 | + inet_peer_gc_maxtime; | ||
151 | add_timer(&peer_periodic_timer); | ||
152 | } | ||
153 | |||
154 | /* Called with or without local BH being disabled. */ | ||
155 | static void unlink_from_unused(struct inet_peer *p) | ||
156 | { | ||
157 | spin_lock_bh(&unused_peers.lock); | ||
158 | list_del_init(&p->unused); | ||
159 | spin_unlock_bh(&unused_peers.lock); | ||
160 | } | 128 | } |
161 | 129 | ||
162 | static int addr_compare(const struct inetpeer_addr *a, | 130 | static int addr_compare(const struct inetpeer_addr *a, |
@@ -203,20 +171,6 @@ static int addr_compare(const struct inetpeer_addr *a, | |||
203 | u; \ | 171 | u; \ |
204 | }) | 172 | }) |
205 | 173 | ||
206 | static bool atomic_add_unless_return(atomic_t *ptr, int a, int u, int *newv) | ||
207 | { | ||
208 | int cur, old = atomic_read(ptr); | ||
209 | |||
210 | while (old != u) { | ||
211 | *newv = old + a; | ||
212 | cur = atomic_cmpxchg(ptr, old, *newv); | ||
213 | if (cur == old) | ||
214 | return true; | ||
215 | old = cur; | ||
216 | } | ||
217 | return false; | ||
218 | } | ||
219 | |||
220 | /* | 174 | /* |
221 | * Called with rcu_read_lock() | 175 | * Called with rcu_read_lock() |
222 | * Because we hold no lock against a writer, its quite possible we fall | 176 | * Because we hold no lock against a writer, its quite possible we fall |
@@ -225,8 +179,7 @@ static bool atomic_add_unless_return(atomic_t *ptr, int a, int u, int *newv) | |||
225 | * We exit from this function if number of links exceeds PEER_MAXDEPTH | 179 | * We exit from this function if number of links exceeds PEER_MAXDEPTH |
226 | */ | 180 | */ |
227 | static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, | 181 | static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, |
228 | struct inet_peer_base *base, | 182 | struct inet_peer_base *base) |
229 | int *newrefcnt) | ||
230 | { | 183 | { |
231 | struct inet_peer *u = rcu_dereference(base->root); | 184 | struct inet_peer *u = rcu_dereference(base->root); |
232 | int count = 0; | 185 | int count = 0; |
@@ -235,11 +188,9 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, | |||
235 | int cmp = addr_compare(daddr, &u->daddr); | 188 | int cmp = addr_compare(daddr, &u->daddr); |
236 | if (cmp == 0) { | 189 | if (cmp == 0) { |
237 | /* Before taking a reference, check if this entry was | 190 | /* Before taking a reference, check if this entry was |
238 | * deleted, unlink_from_pool() sets refcnt=-1 to make | 191 | * deleted (refcnt=-1) |
239 | * distinction between an unused entry (refcnt=0) and | ||
240 | * a freed one. | ||
241 | */ | 192 | */ |
242 | if (!atomic_add_unless_return(&u->refcnt, 1, -1, newrefcnt)) | 193 | if (!atomic_add_unless(&u->refcnt, 1, -1)) |
243 | u = NULL; | 194 | u = NULL; |
244 | return u; | 195 | return u; |
245 | } | 196 | } |
@@ -366,137 +317,99 @@ static void inetpeer_free_rcu(struct rcu_head *head) | |||
366 | kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu)); | 317 | kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu)); |
367 | } | 318 | } |
368 | 319 | ||
369 | /* May be called with local BH enabled. */ | ||
370 | static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, | 320 | static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, |
371 | struct inet_peer __rcu **stack[PEER_MAXDEPTH]) | 321 | struct inet_peer __rcu **stack[PEER_MAXDEPTH]) |
372 | { | 322 | { |
373 | int do_free; | 323 | struct inet_peer __rcu ***stackptr, ***delp; |
374 | 324 | ||
375 | do_free = 0; | 325 | if (lookup(&p->daddr, stack, base) != p) |
376 | 326 | BUG(); | |
377 | write_seqlock_bh(&base->lock); | 327 | delp = stackptr - 1; /* *delp[0] == p */ |
378 | /* Check the reference counter. It was artificially incremented by 1 | 328 | if (p->avl_left == peer_avl_empty_rcu) { |
379 | * in cleanup() function to prevent sudden disappearing. If we can | 329 | *delp[0] = p->avl_right; |
380 | * atomically (because of lockless readers) take this last reference, | 330 | --stackptr; |
381 | * it's safe to remove the node and free it later. | 331 | } else { |
382 | * We use refcnt=-1 to alert lockless readers this entry is deleted. | 332 | /* look for a node to insert instead of p */ |
383 | */ | 333 | struct inet_peer *t; |
384 | if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { | 334 | t = lookup_rightempty(p, base); |
385 | struct inet_peer __rcu ***stackptr, ***delp; | 335 | BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t); |
386 | if (lookup(&p->daddr, stack, base) != p) | 336 | **--stackptr = t->avl_left; |
387 | BUG(); | 337 | /* t is removed, t->daddr > x->daddr for any |
388 | delp = stackptr - 1; /* *delp[0] == p */ | 338 | * x in p->avl_left subtree. |
389 | if (p->avl_left == peer_avl_empty_rcu) { | 339 | * Put t in the old place of p. */ |
390 | *delp[0] = p->avl_right; | 340 | RCU_INIT_POINTER(*delp[0], t); |
391 | --stackptr; | 341 | t->avl_left = p->avl_left; |
392 | } else { | 342 | t->avl_right = p->avl_right; |
393 | /* look for a node to insert instead of p */ | 343 | t->avl_height = p->avl_height; |
394 | struct inet_peer *t; | 344 | BUG_ON(delp[1] != &p->avl_left); |
395 | t = lookup_rightempty(p, base); | 345 | delp[1] = &t->avl_left; /* was &p->avl_left */ |
396 | BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t); | ||
397 | **--stackptr = t->avl_left; | ||
398 | /* t is removed, t->daddr > x->daddr for any | ||
399 | * x in p->avl_left subtree. | ||
400 | * Put t in the old place of p. */ | ||
401 | RCU_INIT_POINTER(*delp[0], t); | ||
402 | t->avl_left = p->avl_left; | ||
403 | t->avl_right = p->avl_right; | ||
404 | t->avl_height = p->avl_height; | ||
405 | BUG_ON(delp[1] != &p->avl_left); | ||
406 | delp[1] = &t->avl_left; /* was &p->avl_left */ | ||
407 | } | ||
408 | peer_avl_rebalance(stack, stackptr, base); | ||
409 | base->total--; | ||
410 | do_free = 1; | ||
411 | } | 346 | } |
412 | write_sequnlock_bh(&base->lock); | 347 | peer_avl_rebalance(stack, stackptr, base); |
413 | 348 | base->total--; | |
414 | if (do_free) | 349 | call_rcu(&p->rcu, inetpeer_free_rcu); |
415 | call_rcu(&p->rcu, inetpeer_free_rcu); | ||
416 | else | ||
417 | /* The node is used again. Decrease the reference counter | ||
418 | * back. The loop "cleanup -> unlink_from_unused | ||
419 | * -> unlink_from_pool -> putpeer -> link_to_unused | ||
420 | * -> cleanup (for the same node)" | ||
421 | * doesn't really exist because the entry will have a | ||
422 | * recent deletion time and will not be cleaned again soon. | ||
423 | */ | ||
424 | inet_putpeer(p); | ||
425 | } | 350 | } |
426 | 351 | ||
427 | static struct inet_peer_base *family_to_base(int family) | 352 | static struct inet_peer_base *family_to_base(int family) |
428 | { | 353 | { |
429 | return (family == AF_INET ? &v4_peers : &v6_peers); | 354 | return family == AF_INET ? &v4_peers : &v6_peers; |
430 | } | ||
431 | |||
432 | static struct inet_peer_base *peer_to_base(struct inet_peer *p) | ||
433 | { | ||
434 | return family_to_base(p->daddr.family); | ||
435 | } | 355 | } |
436 | 356 | ||
437 | /* May be called with local BH enabled. */ | 357 | /* perform garbage collect on all items stacked during a lookup */ |
438 | static int cleanup_once(unsigned long ttl, struct inet_peer __rcu **stack[PEER_MAXDEPTH]) | 358 | static int inet_peer_gc(struct inet_peer_base *base, |
359 | struct inet_peer __rcu **stack[PEER_MAXDEPTH], | ||
360 | struct inet_peer __rcu ***stackptr) | ||
439 | { | 361 | { |
440 | struct inet_peer *p = NULL; | 362 | struct inet_peer *p, *gchead = NULL; |
441 | 363 | __u32 delta, ttl; | |
442 | /* Remove the first entry from the list of unused nodes. */ | 364 | int cnt = 0; |
443 | spin_lock_bh(&unused_peers.lock); | ||
444 | if (!list_empty(&unused_peers.list)) { | ||
445 | __u32 delta; | ||
446 | |||
447 | p = list_first_entry(&unused_peers.list, struct inet_peer, unused); | ||
448 | delta = (__u32)jiffies - p->dtime; | ||
449 | 365 | ||
450 | if (delta < ttl) { | 366 | if (base->total >= inet_peer_threshold) |
451 | /* Do not prune fresh entries. */ | 367 | ttl = 0; /* be aggressive */ |
452 | spin_unlock_bh(&unused_peers.lock); | 368 | else |
453 | return -1; | 369 | ttl = inet_peer_maxttl |
370 | - (inet_peer_maxttl - inet_peer_minttl) / HZ * | ||
371 | base->total / inet_peer_threshold * HZ; | ||
372 | stackptr--; /* last stack slot is peer_avl_empty */ | ||
373 | while (stackptr > stack) { | ||
374 | stackptr--; | ||
375 | p = rcu_deref_locked(**stackptr, base); | ||
376 | if (atomic_read(&p->refcnt) == 0) { | ||
377 | smp_rmb(); | ||
378 | delta = (__u32)jiffies - p->dtime; | ||
379 | if (delta >= ttl && | ||
380 | atomic_cmpxchg(&p->refcnt, 0, -1) == 0) { | ||
381 | p->gc_next = gchead; | ||
382 | gchead = p; | ||
383 | } | ||
454 | } | 384 | } |
455 | |||
456 | list_del_init(&p->unused); | ||
457 | |||
458 | /* Grab an extra reference to prevent node disappearing | ||
459 | * before unlink_from_pool() call. */ | ||
460 | atomic_inc(&p->refcnt); | ||
461 | } | 385 | } |
462 | spin_unlock_bh(&unused_peers.lock); | 386 | while ((p = gchead) != NULL) { |
463 | 387 | gchead = p->gc_next; | |
464 | if (p == NULL) | 388 | cnt++; |
465 | /* It means that the total number of USED entries has | 389 | unlink_from_pool(p, base, stack); |
466 | * grown over inet_peer_threshold. It shouldn't really | 390 | } |
467 | * happen because of entry limits in route cache. */ | 391 | return cnt; |
468 | return -1; | ||
469 | |||
470 | unlink_from_pool(p, peer_to_base(p), stack); | ||
471 | return 0; | ||
472 | } | 392 | } |
473 | 393 | ||
474 | /* Called with or without local BH being disabled. */ | 394 | struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create) |
475 | struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) | ||
476 | { | 395 | { |
477 | struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; | 396 | struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; |
478 | struct inet_peer_base *base = family_to_base(daddr->family); | 397 | struct inet_peer_base *base = family_to_base(daddr->family); |
479 | struct inet_peer *p; | 398 | struct inet_peer *p; |
480 | unsigned int sequence; | 399 | unsigned int sequence; |
481 | int invalidated, newrefcnt = 0; | 400 | int invalidated, gccnt = 0; |
482 | 401 | ||
483 | /* Look up for the address quickly, lockless. | 402 | /* Attempt a lockless lookup first. |
484 | * Because of a concurrent writer, we might not find an existing entry. | 403 | * Because of a concurrent writer, we might not find an existing entry. |
485 | */ | 404 | */ |
486 | rcu_read_lock(); | 405 | rcu_read_lock(); |
487 | sequence = read_seqbegin(&base->lock); | 406 | sequence = read_seqbegin(&base->lock); |
488 | p = lookup_rcu(daddr, base, &newrefcnt); | 407 | p = lookup_rcu(daddr, base); |
489 | invalidated = read_seqretry(&base->lock, sequence); | 408 | invalidated = read_seqretry(&base->lock, sequence); |
490 | rcu_read_unlock(); | 409 | rcu_read_unlock(); |
491 | 410 | ||
492 | if (p) { | 411 | if (p) |
493 | found: /* The existing node has been found. | ||
494 | * Remove the entry from unused list if it was there. | ||
495 | */ | ||
496 | if (newrefcnt == 1) | ||
497 | unlink_from_unused(p); | ||
498 | return p; | 412 | return p; |
499 | } | ||
500 | 413 | ||
501 | /* If no writer did a change during our lookup, we can return early. */ | 414 | /* If no writer did a change during our lookup, we can return early. */ |
502 | if (!create && !invalidated) | 415 | if (!create && !invalidated) |
@@ -506,18 +419,27 @@ found: /* The existing node has been found. | |||
506 | * At least, nodes should be hot in our cache. | 419 | * At least, nodes should be hot in our cache. |
507 | */ | 420 | */ |
508 | write_seqlock_bh(&base->lock); | 421 | write_seqlock_bh(&base->lock); |
422 | relookup: | ||
509 | p = lookup(daddr, stack, base); | 423 | p = lookup(daddr, stack, base); |
510 | if (p != peer_avl_empty) { | 424 | if (p != peer_avl_empty) { |
511 | newrefcnt = atomic_inc_return(&p->refcnt); | 425 | atomic_inc(&p->refcnt); |
512 | write_sequnlock_bh(&base->lock); | 426 | write_sequnlock_bh(&base->lock); |
513 | goto found; | 427 | return p; |
428 | } | ||
429 | if (!gccnt) { | ||
430 | gccnt = inet_peer_gc(base, stack, stackptr); | ||
431 | if (gccnt && create) | ||
432 | goto relookup; | ||
514 | } | 433 | } |
515 | p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; | 434 | p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; |
516 | if (p) { | 435 | if (p) { |
517 | p->daddr = *daddr; | 436 | p->daddr = *daddr; |
518 | atomic_set(&p->refcnt, 1); | 437 | atomic_set(&p->refcnt, 1); |
519 | atomic_set(&p->rid, 0); | 438 | atomic_set(&p->rid, 0); |
520 | atomic_set(&p->ip_id_count, secure_ip_id(daddr->addr.a4)); | 439 | atomic_set(&p->ip_id_count, |
440 | (daddr->family == AF_INET) ? | ||
441 | secure_ip_id(daddr->addr.a4) : | ||
442 | secure_ipv6_id(daddr->addr.a6)); | ||
521 | p->tcp_ts_stamp = 0; | 443 | p->tcp_ts_stamp = 0; |
522 | p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; | 444 | p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; |
523 | p->rate_tokens = 0; | 445 | p->rate_tokens = 0; |
@@ -525,7 +447,6 @@ found: /* The existing node has been found. | |||
525 | p->pmtu_expires = 0; | 447 | p->pmtu_expires = 0; |
526 | p->pmtu_orig = 0; | 448 | p->pmtu_orig = 0; |
527 | memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); | 449 | memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); |
528 | INIT_LIST_HEAD(&p->unused); | ||
529 | 450 | ||
530 | 451 | ||
531 | /* Link the node. */ | 452 | /* Link the node. */ |
@@ -534,63 +455,15 @@ found: /* The existing node has been found. | |||
534 | } | 455 | } |
535 | write_sequnlock_bh(&base->lock); | 456 | write_sequnlock_bh(&base->lock); |
536 | 457 | ||
537 | if (base->total >= inet_peer_threshold) | ||
538 | /* Remove one less-recently-used entry. */ | ||
539 | cleanup_once(0, stack); | ||
540 | |||
541 | return p; | 458 | return p; |
542 | } | 459 | } |
543 | |||
544 | static int compute_total(void) | ||
545 | { | ||
546 | return v4_peers.total + v6_peers.total; | ||
547 | } | ||
548 | EXPORT_SYMBOL_GPL(inet_getpeer); | 460 | EXPORT_SYMBOL_GPL(inet_getpeer); |
549 | 461 | ||
550 | /* Called with local BH disabled. */ | ||
551 | static void peer_check_expire(unsigned long dummy) | ||
552 | { | ||
553 | unsigned long now = jiffies; | ||
554 | int ttl, total; | ||
555 | struct inet_peer __rcu **stack[PEER_MAXDEPTH]; | ||
556 | |||
557 | total = compute_total(); | ||
558 | if (total >= inet_peer_threshold) | ||
559 | ttl = inet_peer_minttl; | ||
560 | else | ||
561 | ttl = inet_peer_maxttl | ||
562 | - (inet_peer_maxttl - inet_peer_minttl) / HZ * | ||
563 | total / inet_peer_threshold * HZ; | ||
564 | while (!cleanup_once(ttl, stack)) { | ||
565 | if (jiffies != now) | ||
566 | break; | ||
567 | } | ||
568 | |||
569 | /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime | ||
570 | * interval depending on the total number of entries (more entries, | ||
571 | * less interval). */ | ||
572 | total = compute_total(); | ||
573 | if (total >= inet_peer_threshold) | ||
574 | peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; | ||
575 | else | ||
576 | peer_periodic_timer.expires = jiffies | ||
577 | + inet_peer_gc_maxtime | ||
578 | - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * | ||
579 | total / inet_peer_threshold * HZ; | ||
580 | add_timer(&peer_periodic_timer); | ||
581 | } | ||
582 | |||
583 | void inet_putpeer(struct inet_peer *p) | 462 | void inet_putpeer(struct inet_peer *p) |
584 | { | 463 | { |
585 | local_bh_disable(); | 464 | p->dtime = (__u32)jiffies; |
586 | 465 | smp_mb__before_atomic_dec(); | |
587 | if (atomic_dec_and_lock(&p->refcnt, &unused_peers.lock)) { | 466 | atomic_dec(&p->refcnt); |
588 | list_add_tail(&p->unused, &unused_peers.list); | ||
589 | p->dtime = (__u32)jiffies; | ||
590 | spin_unlock(&unused_peers.lock); | ||
591 | } | ||
592 | |||
593 | local_bh_enable(); | ||
594 | } | 467 | } |
595 | EXPORT_SYMBOL_GPL(inet_putpeer); | 468 | EXPORT_SYMBOL_GPL(inet_putpeer); |
596 | 469 | ||
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 0ad6035f636..0e0ab98abc6 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -261,8 +261,9 @@ static void ip_expire(unsigned long arg) | |||
261 | * Only an end host needs to send an ICMP | 261 | * Only an end host needs to send an ICMP |
262 | * "Fragment Reassembly Timeout" message, per RFC792. | 262 | * "Fragment Reassembly Timeout" message, per RFC792. |
263 | */ | 263 | */ |
264 | if (qp->user == IP_DEFRAG_CONNTRACK_IN && | 264 | if (qp->user == IP_DEFRAG_AF_PACKET || |
265 | skb_rtable(head)->rt_type != RTN_LOCAL) | 265 | (qp->user == IP_DEFRAG_CONNTRACK_IN && |
266 | skb_rtable(head)->rt_type != RTN_LOCAL)) | ||
266 | goto out_rcu_unlock; | 267 | goto out_rcu_unlock; |
267 | 268 | ||
268 | 269 | ||
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 8871067560d..d7bb94c4834 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -731,9 +731,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
731 | } | 731 | } |
732 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 732 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
733 | else if (skb->protocol == htons(ETH_P_IPV6)) { | 733 | else if (skb->protocol == htons(ETH_P_IPV6)) { |
734 | struct neighbour *neigh = dst_get_neighbour(skb_dst(skb)); | ||
734 | const struct in6_addr *addr6; | 735 | const struct in6_addr *addr6; |
735 | int addr_type; | 736 | int addr_type; |
736 | struct neighbour *neigh = skb_dst(skb)->neighbour; | ||
737 | 737 | ||
738 | if (neigh == NULL) | 738 | if (neigh == NULL) |
739 | goto tx_error; | 739 | goto tx_error; |
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index c8f48efc5fd..073a9b01c40 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -165,7 +165,7 @@ int ip_call_ra_chain(struct sk_buff *skb) | |||
165 | (!sk->sk_bound_dev_if || | 165 | (!sk->sk_bound_dev_if || |
166 | sk->sk_bound_dev_if == dev->ifindex) && | 166 | sk->sk_bound_dev_if == dev->ifindex) && |
167 | net_eq(sock_net(sk), dev_net(dev))) { | 167 | net_eq(sock_net(sk), dev_net(dev))) { |
168 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { | 168 | if (ip_is_fragment(ip_hdr(skb))) { |
169 | if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) | 169 | if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) |
170 | return 1; | 170 | return 1; |
171 | } | 171 | } |
@@ -256,7 +256,7 @@ int ip_local_deliver(struct sk_buff *skb) | |||
256 | * Reassemble IP fragments. | 256 | * Reassemble IP fragments. |
257 | */ | 257 | */ |
258 | 258 | ||
259 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { | 259 | if (ip_is_fragment(ip_hdr(skb))) { |
260 | if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER)) | 260 | if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER)) |
261 | return 0; | 261 | return 0; |
262 | } | 262 | } |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a8024eaa0e8..ccaaa851ab4 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -182,6 +182,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) | |||
182 | struct rtable *rt = (struct rtable *)dst; | 182 | struct rtable *rt = (struct rtable *)dst; |
183 | struct net_device *dev = dst->dev; | 183 | struct net_device *dev = dst->dev; |
184 | unsigned int hh_len = LL_RESERVED_SPACE(dev); | 184 | unsigned int hh_len = LL_RESERVED_SPACE(dev); |
185 | struct neighbour *neigh; | ||
185 | 186 | ||
186 | if (rt->rt_type == RTN_MULTICAST) { | 187 | if (rt->rt_type == RTN_MULTICAST) { |
187 | IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); | 188 | IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); |
@@ -203,10 +204,9 @@ static inline int ip_finish_output2(struct sk_buff *skb) | |||
203 | skb = skb2; | 204 | skb = skb2; |
204 | } | 205 | } |
205 | 206 | ||
206 | if (dst->hh) | 207 | neigh = dst_get_neighbour(dst); |
207 | return neigh_hh_output(dst->hh, skb); | 208 | if (neigh) |
208 | else if (dst->neighbour) | 209 | return neigh_output(neigh, skb); |
209 | return dst->neighbour->output(skb); | ||
210 | 210 | ||
211 | if (net_ratelimit()) | 211 | if (net_ratelimit()) |
212 | printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); | 212 | printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); |
@@ -489,7 +489,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
489 | 489 | ||
490 | if (first_len - hlen > mtu || | 490 | if (first_len - hlen > mtu || |
491 | ((first_len - hlen) & 7) || | 491 | ((first_len - hlen) & 7) || |
492 | (iph->frag_off & htons(IP_MF|IP_OFFSET)) || | 492 | ip_is_fragment(iph) || |
493 | skb_cloned(skb)) | 493 | skb_cloned(skb)) |
494 | goto slow_path; | 494 | goto slow_path; |
495 | 495 | ||
@@ -734,7 +734,7 @@ static inline int ip_ufo_append_data(struct sock *sk, | |||
734 | int getfrag(void *from, char *to, int offset, int len, | 734 | int getfrag(void *from, char *to, int offset, int len, |
735 | int odd, struct sk_buff *skb), | 735 | int odd, struct sk_buff *skb), |
736 | void *from, int length, int hh_len, int fragheaderlen, | 736 | void *from, int length, int hh_len, int fragheaderlen, |
737 | int transhdrlen, int mtu, unsigned int flags) | 737 | int transhdrlen, int maxfraglen, unsigned int flags) |
738 | { | 738 | { |
739 | struct sk_buff *skb; | 739 | struct sk_buff *skb; |
740 | int err; | 740 | int err; |
@@ -767,7 +767,7 @@ static inline int ip_ufo_append_data(struct sock *sk, | |||
767 | skb->csum = 0; | 767 | skb->csum = 0; |
768 | 768 | ||
769 | /* specify the length of each IP datagram fragment */ | 769 | /* specify the length of each IP datagram fragment */ |
770 | skb_shinfo(skb)->gso_size = mtu - fragheaderlen; | 770 | skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen; |
771 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; | 771 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; |
772 | __skb_queue_tail(queue, skb); | 772 | __skb_queue_tail(queue, skb); |
773 | } | 773 | } |
@@ -802,8 +802,6 @@ static int __ip_append_data(struct sock *sk, | |||
802 | skb = skb_peek_tail(queue); | 802 | skb = skb_peek_tail(queue); |
803 | 803 | ||
804 | exthdrlen = !skb ? rt->dst.header_len : 0; | 804 | exthdrlen = !skb ? rt->dst.header_len : 0; |
805 | length += exthdrlen; | ||
806 | transhdrlen += exthdrlen; | ||
807 | mtu = cork->fragsize; | 805 | mtu = cork->fragsize; |
808 | 806 | ||
809 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); | 807 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); |
@@ -830,10 +828,10 @@ static int __ip_append_data(struct sock *sk, | |||
830 | cork->length += length; | 828 | cork->length += length; |
831 | if (((length > mtu) || (skb && skb_is_gso(skb))) && | 829 | if (((length > mtu) || (skb && skb_is_gso(skb))) && |
832 | (sk->sk_protocol == IPPROTO_UDP) && | 830 | (sk->sk_protocol == IPPROTO_UDP) && |
833 | (rt->dst.dev->features & NETIF_F_UFO)) { | 831 | (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) { |
834 | err = ip_ufo_append_data(sk, queue, getfrag, from, length, | 832 | err = ip_ufo_append_data(sk, queue, getfrag, from, length, |
835 | hh_len, fragheaderlen, transhdrlen, | 833 | hh_len, fragheaderlen, transhdrlen, |
836 | mtu, flags); | 834 | maxfraglen, flags); |
837 | if (err) | 835 | if (err) |
838 | goto error; | 836 | goto error; |
839 | return 0; | 837 | return 0; |
@@ -883,17 +881,16 @@ alloc_new_skb: | |||
883 | else | 881 | else |
884 | alloclen = fraglen; | 882 | alloclen = fraglen; |
885 | 883 | ||
884 | alloclen += exthdrlen; | ||
885 | |||
886 | /* The last fragment gets additional space at tail. | 886 | /* The last fragment gets additional space at tail. |
887 | * Note, with MSG_MORE we overallocate on fragments, | 887 | * Note, with MSG_MORE we overallocate on fragments, |
888 | * because we have no idea what fragment will be | 888 | * because we have no idea what fragment will be |
889 | * the last. | 889 | * the last. |
890 | */ | 890 | */ |
891 | if (datalen == length + fraggap) { | 891 | if (datalen == length + fraggap) |
892 | alloclen += rt->dst.trailer_len; | 892 | alloclen += rt->dst.trailer_len; |
893 | /* make sure mtu is not reached */ | 893 | |
894 | if (datalen > mtu - fragheaderlen - rt->dst.trailer_len) | ||
895 | datalen -= ALIGN(rt->dst.trailer_len, 8); | ||
896 | } | ||
897 | if (transhdrlen) { | 894 | if (transhdrlen) { |
898 | skb = sock_alloc_send_skb(sk, | 895 | skb = sock_alloc_send_skb(sk, |
899 | alloclen + hh_len + 15, | 896 | alloclen + hh_len + 15, |
@@ -926,11 +923,11 @@ alloc_new_skb: | |||
926 | /* | 923 | /* |
927 | * Find where to start putting bytes. | 924 | * Find where to start putting bytes. |
928 | */ | 925 | */ |
929 | data = skb_put(skb, fraglen); | 926 | data = skb_put(skb, fraglen + exthdrlen); |
930 | skb_set_network_header(skb, exthdrlen); | 927 | skb_set_network_header(skb, exthdrlen); |
931 | skb->transport_header = (skb->network_header + | 928 | skb->transport_header = (skb->network_header + |
932 | fragheaderlen); | 929 | fragheaderlen); |
933 | data += fragheaderlen; | 930 | data += fragheaderlen + exthdrlen; |
934 | 931 | ||
935 | if (fraggap) { | 932 | if (fraggap) { |
936 | skb->csum = skb_copy_and_csum_bits( | 933 | skb->csum = skb_copy_and_csum_bits( |
@@ -1064,7 +1061,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, | |||
1064 | */ | 1061 | */ |
1065 | *rtp = NULL; | 1062 | *rtp = NULL; |
1066 | cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ? | 1063 | cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ? |
1067 | rt->dst.dev->mtu : dst_mtu(rt->dst.path); | 1064 | rt->dst.dev->mtu : dst_mtu(&rt->dst); |
1068 | cork->dst = &rt->dst; | 1065 | cork->dst = &rt->dst; |
1069 | cork->length = 0; | 1066 | cork->length = 0; |
1070 | cork->tx_flags = ipc->tx_flags; | 1067 | cork->tx_flags = ipc->tx_flags; |
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index ab7e5542c1c..472a8c4f1dc 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -861,41 +861,44 @@ static void __init ic_do_bootp_ext(u8 *ext) | |||
861 | #endif | 861 | #endif |
862 | 862 | ||
863 | switch (*ext++) { | 863 | switch (*ext++) { |
864 | case 1: /* Subnet mask */ | 864 | case 1: /* Subnet mask */ |
865 | if (ic_netmask == NONE) | 865 | if (ic_netmask == NONE) |
866 | memcpy(&ic_netmask, ext+1, 4); | 866 | memcpy(&ic_netmask, ext+1, 4); |
867 | break; | 867 | break; |
868 | case 3: /* Default gateway */ | 868 | case 3: /* Default gateway */ |
869 | if (ic_gateway == NONE) | 869 | if (ic_gateway == NONE) |
870 | memcpy(&ic_gateway, ext+1, 4); | 870 | memcpy(&ic_gateway, ext+1, 4); |
871 | break; | 871 | break; |
872 | case 6: /* DNS server */ | 872 | case 6: /* DNS server */ |
873 | servers= *ext/4; | 873 | servers= *ext/4; |
874 | if (servers > CONF_NAMESERVERS_MAX) | 874 | if (servers > CONF_NAMESERVERS_MAX) |
875 | servers = CONF_NAMESERVERS_MAX; | 875 | servers = CONF_NAMESERVERS_MAX; |
876 | for (i = 0; i < servers; i++) { | 876 | for (i = 0; i < servers; i++) { |
877 | if (ic_nameservers[i] == NONE) | 877 | if (ic_nameservers[i] == NONE) |
878 | memcpy(&ic_nameservers[i], ext+1+4*i, 4); | 878 | memcpy(&ic_nameservers[i], ext+1+4*i, 4); |
879 | } | 879 | } |
880 | break; | 880 | break; |
881 | case 12: /* Host name */ | 881 | case 12: /* Host name */ |
882 | ic_bootp_string(utsname()->nodename, ext+1, *ext, __NEW_UTS_LEN); | 882 | ic_bootp_string(utsname()->nodename, ext+1, *ext, |
883 | ic_host_name_set = 1; | 883 | __NEW_UTS_LEN); |
884 | break; | 884 | ic_host_name_set = 1; |
885 | case 15: /* Domain name (DNS) */ | 885 | break; |
886 | ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain)); | 886 | case 15: /* Domain name (DNS) */ |
887 | break; | 887 | ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain)); |
888 | case 17: /* Root path */ | 888 | break; |
889 | if (!root_server_path[0]) | 889 | case 17: /* Root path */ |
890 | ic_bootp_string(root_server_path, ext+1, *ext, sizeof(root_server_path)); | 890 | if (!root_server_path[0]) |
891 | break; | 891 | ic_bootp_string(root_server_path, ext+1, *ext, |
892 | case 26: /* Interface MTU */ | 892 | sizeof(root_server_path)); |
893 | memcpy(&mtu, ext+1, sizeof(mtu)); | 893 | break; |
894 | ic_dev_mtu = ntohs(mtu); | 894 | case 26: /* Interface MTU */ |
895 | break; | 895 | memcpy(&mtu, ext+1, sizeof(mtu)); |
896 | case 40: /* NIS Domain name (_not_ DNS) */ | 896 | ic_dev_mtu = ntohs(mtu); |
897 | ic_bootp_string(utsname()->domainname, ext+1, *ext, __NEW_UTS_LEN); | 897 | break; |
898 | break; | 898 | case 40: /* NIS Domain name (_not_ DNS) */ |
899 | ic_bootp_string(utsname()->domainname, ext+1, *ext, | ||
900 | __NEW_UTS_LEN); | ||
901 | break; | ||
899 | } | 902 | } |
900 | } | 903 | } |
901 | 904 | ||
@@ -932,7 +935,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str | |||
932 | goto drop; | 935 | goto drop; |
933 | 936 | ||
934 | /* Fragments are not supported */ | 937 | /* Fragments are not supported */ |
935 | if (h->frag_off & htons(IP_OFFSET | IP_MF)) { | 938 | if (ip_is_fragment(h)) { |
936 | if (net_ratelimit()) | 939 | if (net_ratelimit()) |
937 | printk(KERN_ERR "DHCP/BOOTP: Ignoring fragmented " | 940 | printk(KERN_ERR "DHCP/BOOTP: Ignoring fragmented " |
938 | "reply.\n"); | 941 | "reply.\n"); |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 30a7763c400..aae2bd8cd92 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -2544,7 +2544,8 @@ int __init ip_mr_init(void) | |||
2544 | goto add_proto_fail; | 2544 | goto add_proto_fail; |
2545 | } | 2545 | } |
2546 | #endif | 2546 | #endif |
2547 | rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute); | 2547 | rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, |
2548 | NULL, ipmr_rtm_dumproute, NULL); | ||
2548 | return 0; | 2549 | return 0; |
2549 | 2550 | ||
2550 | #ifdef CONFIG_IP_PIMSM_V2 | 2551 | #ifdef CONFIG_IP_PIMSM_V2 |
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 4614babdc45..2e97e3ec1eb 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c | |||
@@ -17,51 +17,35 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | |||
17 | const struct iphdr *iph = ip_hdr(skb); | 17 | const struct iphdr *iph = ip_hdr(skb); |
18 | struct rtable *rt; | 18 | struct rtable *rt; |
19 | struct flowi4 fl4 = {}; | 19 | struct flowi4 fl4 = {}; |
20 | unsigned long orefdst; | 20 | __be32 saddr = iph->saddr; |
21 | __u8 flags = 0; | ||
21 | unsigned int hh_len; | 22 | unsigned int hh_len; |
22 | unsigned int type; | ||
23 | 23 | ||
24 | type = inet_addr_type(net, iph->saddr); | 24 | if (!skb->sk && addr_type != RTN_LOCAL) { |
25 | if (skb->sk && inet_sk(skb->sk)->transparent) | 25 | if (addr_type == RTN_UNSPEC) |
26 | type = RTN_LOCAL; | 26 | addr_type = inet_addr_type(net, saddr); |
27 | if (addr_type == RTN_UNSPEC) | 27 | if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) |
28 | addr_type = type; | 28 | flags |= FLOWI_FLAG_ANYSRC; |
29 | else | ||
30 | saddr = 0; | ||
31 | } | ||
29 | 32 | ||
30 | /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause | 33 | /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause |
31 | * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. | 34 | * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. |
32 | */ | 35 | */ |
33 | if (addr_type == RTN_LOCAL) { | 36 | fl4.daddr = iph->daddr; |
34 | fl4.daddr = iph->daddr; | 37 | fl4.saddr = saddr; |
35 | if (type == RTN_LOCAL) | 38 | fl4.flowi4_tos = RT_TOS(iph->tos); |
36 | fl4.saddr = iph->saddr; | 39 | fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; |
37 | fl4.flowi4_tos = RT_TOS(iph->tos); | 40 | fl4.flowi4_mark = skb->mark; |
38 | fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; | 41 | fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : flags; |
39 | fl4.flowi4_mark = skb->mark; | 42 | rt = ip_route_output_key(net, &fl4); |
40 | fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; | 43 | if (IS_ERR(rt)) |
41 | rt = ip_route_output_key(net, &fl4); | 44 | return -1; |
42 | if (IS_ERR(rt)) | ||
43 | return -1; | ||
44 | |||
45 | /* Drop old route. */ | ||
46 | skb_dst_drop(skb); | ||
47 | skb_dst_set(skb, &rt->dst); | ||
48 | } else { | ||
49 | /* non-local src, find valid iif to satisfy | ||
50 | * rp-filter when calling ip_route_input. */ | ||
51 | fl4.daddr = iph->saddr; | ||
52 | rt = ip_route_output_key(net, &fl4); | ||
53 | if (IS_ERR(rt)) | ||
54 | return -1; | ||
55 | 45 | ||
56 | orefdst = skb->_skb_refdst; | 46 | /* Drop old route. */ |
57 | if (ip_route_input(skb, iph->daddr, iph->saddr, | 47 | skb_dst_drop(skb); |
58 | RT_TOS(iph->tos), rt->dst.dev) != 0) { | 48 | skb_dst_set(skb, &rt->dst); |
59 | dst_release(&rt->dst); | ||
60 | return -1; | ||
61 | } | ||
62 | dst_release(&rt->dst); | ||
63 | refdst_drop(orefdst); | ||
64 | } | ||
65 | 49 | ||
66 | if (skb_dst(skb)->error) | 50 | if (skb_dst(skb)->error) |
67 | return -1; | 51 | return -1; |
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 5c9e97c7901..db8d22db425 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
@@ -317,19 +317,19 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
317 | hash = clusterip_hashfn(skb, cipinfo->config); | 317 | hash = clusterip_hashfn(skb, cipinfo->config); |
318 | 318 | ||
319 | switch (ctinfo) { | 319 | switch (ctinfo) { |
320 | case IP_CT_NEW: | 320 | case IP_CT_NEW: |
321 | ct->mark = hash; | 321 | ct->mark = hash; |
322 | break; | 322 | break; |
323 | case IP_CT_RELATED: | 323 | case IP_CT_RELATED: |
324 | case IP_CT_RELATED_REPLY: | 324 | case IP_CT_RELATED_REPLY: |
325 | /* FIXME: we don't handle expectations at the | 325 | /* FIXME: we don't handle expectations at the moment. |
326 | * moment. they can arrive on a different node than | 326 | * They can arrive on a different node than |
327 | * the master connection (e.g. FTP passive mode) */ | 327 | * the master connection (e.g. FTP passive mode) */ |
328 | case IP_CT_ESTABLISHED: | 328 | case IP_CT_ESTABLISHED: |
329 | case IP_CT_ESTABLISHED_REPLY: | 329 | case IP_CT_ESTABLISHED_REPLY: |
330 | break; | 330 | break; |
331 | default: | 331 | default: /* Prevent gcc warnings */ |
332 | break; | 332 | break; |
333 | } | 333 | } |
334 | 334 | ||
335 | #ifdef DEBUG | 335 | #ifdef DEBUG |
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 1ff79e557f9..51f13f8ec72 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c | |||
@@ -40,7 +40,6 @@ static void send_reset(struct sk_buff *oldskb, int hook) | |||
40 | struct iphdr *niph; | 40 | struct iphdr *niph; |
41 | const struct tcphdr *oth; | 41 | const struct tcphdr *oth; |
42 | struct tcphdr _otcph, *tcph; | 42 | struct tcphdr _otcph, *tcph; |
43 | unsigned int addr_type; | ||
44 | 43 | ||
45 | /* IP header checks: fragment. */ | 44 | /* IP header checks: fragment. */ |
46 | if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) | 45 | if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) |
@@ -55,6 +54,9 @@ static void send_reset(struct sk_buff *oldskb, int hook) | |||
55 | if (oth->rst) | 54 | if (oth->rst) |
56 | return; | 55 | return; |
57 | 56 | ||
57 | if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) | ||
58 | return; | ||
59 | |||
58 | /* Check checksum */ | 60 | /* Check checksum */ |
59 | if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) | 61 | if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) |
60 | return; | 62 | return; |
@@ -101,19 +103,11 @@ static void send_reset(struct sk_buff *oldskb, int hook) | |||
101 | nskb->csum_start = (unsigned char *)tcph - nskb->head; | 103 | nskb->csum_start = (unsigned char *)tcph - nskb->head; |
102 | nskb->csum_offset = offsetof(struct tcphdr, check); | 104 | nskb->csum_offset = offsetof(struct tcphdr, check); |
103 | 105 | ||
104 | addr_type = RTN_UNSPEC; | ||
105 | if (hook != NF_INET_FORWARD | ||
106 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
107 | || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED) | ||
108 | #endif | ||
109 | ) | ||
110 | addr_type = RTN_LOCAL; | ||
111 | |||
112 | /* ip_route_me_harder expects skb->dst to be set */ | 106 | /* ip_route_me_harder expects skb->dst to be set */ |
113 | skb_dst_set_noref(nskb, skb_dst(oldskb)); | 107 | skb_dst_set_noref(nskb, skb_dst(oldskb)); |
114 | 108 | ||
115 | nskb->protocol = htons(ETH_P_IP); | 109 | nskb->protocol = htons(ETH_P_IP); |
116 | if (ip_route_me_harder(nskb, addr_type)) | 110 | if (ip_route_me_harder(nskb, RTN_UNSPEC)) |
117 | goto free_nskb; | 111 | goto free_nskb; |
118 | 112 | ||
119 | niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); | 113 | niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); |
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index f3a9b42b16c..9bb1b8a37a2 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c | |||
@@ -82,7 +82,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, | |||
82 | #endif | 82 | #endif |
83 | #endif | 83 | #endif |
84 | /* Gather fragments. */ | 84 | /* Gather fragments. */ |
85 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { | 85 | if (ip_is_fragment(ip_hdr(skb))) { |
86 | enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb); | 86 | enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb); |
87 | if (nf_ct_ipv4_gather_frags(skb, user)) | 87 | if (nf_ct_ipv4_gather_frags(skb, user)) |
88 | return NF_STOLEN; | 88 | return NF_STOLEN; |
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 8812a02078a..076b7c8c4aa 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c | |||
@@ -719,117 +719,115 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, | |||
719 | 719 | ||
720 | l = 0; | 720 | l = 0; |
721 | switch (type) { | 721 | switch (type) { |
722 | case SNMP_INTEGER: | 722 | case SNMP_INTEGER: |
723 | len = sizeof(long); | 723 | len = sizeof(long); |
724 | if (!asn1_long_decode(ctx, end, &l)) { | 724 | if (!asn1_long_decode(ctx, end, &l)) { |
725 | kfree(id); | 725 | kfree(id); |
726 | return 0; | 726 | return 0; |
727 | } | 727 | } |
728 | *obj = kmalloc(sizeof(struct snmp_object) + len, | 728 | *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); |
729 | GFP_ATOMIC); | 729 | if (*obj == NULL) { |
730 | if (*obj == NULL) { | 730 | kfree(id); |
731 | kfree(id); | 731 | if (net_ratelimit()) |
732 | if (net_ratelimit()) | 732 | pr_notice("OOM in bsalg (%d)\n", __LINE__); |
733 | pr_notice("OOM in bsalg (%d)\n", __LINE__); | 733 | return 0; |
734 | return 0; | 734 | } |
735 | } | 735 | (*obj)->syntax.l[0] = l; |
736 | (*obj)->syntax.l[0] = l; | 736 | break; |
737 | break; | 737 | case SNMP_OCTETSTR: |
738 | case SNMP_OCTETSTR: | 738 | case SNMP_OPAQUE: |
739 | case SNMP_OPAQUE: | 739 | if (!asn1_octets_decode(ctx, end, &p, &len)) { |
740 | if (!asn1_octets_decode(ctx, end, &p, &len)) { | 740 | kfree(id); |
741 | kfree(id); | 741 | return 0; |
742 | return 0; | 742 | } |
743 | } | 743 | *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); |
744 | *obj = kmalloc(sizeof(struct snmp_object) + len, | 744 | if (*obj == NULL) { |
745 | GFP_ATOMIC); | ||
746 | if (*obj == NULL) { | ||
747 | kfree(p); | ||
748 | kfree(id); | ||
749 | if (net_ratelimit()) | ||
750 | pr_notice("OOM in bsalg (%d)\n", __LINE__); | ||
751 | return 0; | ||
752 | } | ||
753 | memcpy((*obj)->syntax.c, p, len); | ||
754 | kfree(p); | 745 | kfree(p); |
755 | break; | 746 | kfree(id); |
756 | case SNMP_NULL: | 747 | if (net_ratelimit()) |
757 | case SNMP_NOSUCHOBJECT: | 748 | pr_notice("OOM in bsalg (%d)\n", __LINE__); |
758 | case SNMP_NOSUCHINSTANCE: | 749 | return 0; |
759 | case SNMP_ENDOFMIBVIEW: | 750 | } |
760 | len = 0; | 751 | memcpy((*obj)->syntax.c, p, len); |
761 | *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); | 752 | kfree(p); |
762 | if (*obj == NULL) { | 753 | break; |
763 | kfree(id); | 754 | case SNMP_NULL: |
764 | if (net_ratelimit()) | 755 | case SNMP_NOSUCHOBJECT: |
765 | pr_notice("OOM in bsalg (%d)\n", __LINE__); | 756 | case SNMP_NOSUCHINSTANCE: |
766 | return 0; | 757 | case SNMP_ENDOFMIBVIEW: |
767 | } | 758 | len = 0; |
768 | if (!asn1_null_decode(ctx, end)) { | 759 | *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); |
769 | kfree(id); | 760 | if (*obj == NULL) { |
770 | kfree(*obj); | 761 | kfree(id); |
771 | *obj = NULL; | 762 | if (net_ratelimit()) |
772 | return 0; | 763 | pr_notice("OOM in bsalg (%d)\n", __LINE__); |
773 | } | 764 | return 0; |
774 | break; | 765 | } |
775 | case SNMP_OBJECTID: | 766 | if (!asn1_null_decode(ctx, end)) { |
776 | if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) { | 767 | kfree(id); |
777 | kfree(id); | 768 | kfree(*obj); |
778 | return 0; | 769 | *obj = NULL; |
779 | } | 770 | return 0; |
780 | len *= sizeof(unsigned long); | 771 | } |
781 | *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); | 772 | break; |
782 | if (*obj == NULL) { | 773 | case SNMP_OBJECTID: |
783 | kfree(lp); | 774 | if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) { |
784 | kfree(id); | 775 | kfree(id); |
785 | if (net_ratelimit()) | 776 | return 0; |
786 | pr_notice("OOM in bsalg (%d)\n", __LINE__); | 777 | } |
787 | return 0; | 778 | len *= sizeof(unsigned long); |
788 | } | 779 | *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); |
789 | memcpy((*obj)->syntax.ul, lp, len); | 780 | if (*obj == NULL) { |
790 | kfree(lp); | 781 | kfree(lp); |
791 | break; | 782 | kfree(id); |
792 | case SNMP_IPADDR: | 783 | if (net_ratelimit()) |
793 | if (!asn1_octets_decode(ctx, end, &p, &len)) { | 784 | pr_notice("OOM in bsalg (%d)\n", __LINE__); |
794 | kfree(id); | 785 | return 0; |
795 | return 0; | 786 | } |
796 | } | 787 | memcpy((*obj)->syntax.ul, lp, len); |
797 | if (len != 4) { | 788 | kfree(lp); |
798 | kfree(p); | 789 | break; |
799 | kfree(id); | 790 | case SNMP_IPADDR: |
800 | return 0; | 791 | if (!asn1_octets_decode(ctx, end, &p, &len)) { |
801 | } | 792 | kfree(id); |
802 | *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); | 793 | return 0; |
803 | if (*obj == NULL) { | 794 | } |
804 | kfree(p); | 795 | if (len != 4) { |
805 | kfree(id); | ||
806 | if (net_ratelimit()) | ||
807 | pr_notice("OOM in bsalg (%d)\n", __LINE__); | ||
808 | return 0; | ||
809 | } | ||
810 | memcpy((*obj)->syntax.uc, p, len); | ||
811 | kfree(p); | 796 | kfree(p); |
812 | break; | ||
813 | case SNMP_COUNTER: | ||
814 | case SNMP_GAUGE: | ||
815 | case SNMP_TIMETICKS: | ||
816 | len = sizeof(unsigned long); | ||
817 | if (!asn1_ulong_decode(ctx, end, &ul)) { | ||
818 | kfree(id); | ||
819 | return 0; | ||
820 | } | ||
821 | *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); | ||
822 | if (*obj == NULL) { | ||
823 | kfree(id); | ||
824 | if (net_ratelimit()) | ||
825 | pr_notice("OOM in bsalg (%d)\n", __LINE__); | ||
826 | return 0; | ||
827 | } | ||
828 | (*obj)->syntax.ul[0] = ul; | ||
829 | break; | ||
830 | default: | ||
831 | kfree(id); | 797 | kfree(id); |
832 | return 0; | 798 | return 0; |
799 | } | ||
800 | *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); | ||
801 | if (*obj == NULL) { | ||
802 | kfree(p); | ||
803 | kfree(id); | ||
804 | if (net_ratelimit()) | ||
805 | pr_notice("OOM in bsalg (%d)\n", __LINE__); | ||
806 | return 0; | ||
807 | } | ||
808 | memcpy((*obj)->syntax.uc, p, len); | ||
809 | kfree(p); | ||
810 | break; | ||
811 | case SNMP_COUNTER: | ||
812 | case SNMP_GAUGE: | ||
813 | case SNMP_TIMETICKS: | ||
814 | len = sizeof(unsigned long); | ||
815 | if (!asn1_ulong_decode(ctx, end, &ul)) { | ||
816 | kfree(id); | ||
817 | return 0; | ||
818 | } | ||
819 | *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); | ||
820 | if (*obj == NULL) { | ||
821 | kfree(id); | ||
822 | if (net_ratelimit()) | ||
823 | pr_notice("OOM in bsalg (%d)\n", __LINE__); | ||
824 | return 0; | ||
825 | } | ||
826 | (*obj)->syntax.ul[0] = ul; | ||
827 | break; | ||
828 | default: | ||
829 | kfree(id); | ||
830 | return 0; | ||
833 | } | 831 | } |
834 | 832 | ||
835 | (*obj)->syntax_len = len; | 833 | (*obj)->syntax_len = len; |
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 483b76d042d..a6e606e8482 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c | |||
@@ -88,7 +88,7 @@ nf_nat_fn(unsigned int hooknum, | |||
88 | 88 | ||
89 | /* We never see fragments: conntrack defrags on pre-routing | 89 | /* We never see fragments: conntrack defrags on pre-routing |
90 | and local-out, and nf_nat_out protects post-routing. */ | 90 | and local-out, and nf_nat_out protects post-routing. */ |
91 | NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); | 91 | NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); |
92 | 92 | ||
93 | ct = nf_ct_get(skb, &ctinfo); | 93 | ct = nf_ct_get(skb, &ctinfo); |
94 | /* Can't track? It's not due to stress, or conntrack would | 94 | /* Can't track? It's not due to stress, or conntrack would |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index c9893d43242..08526786dc3 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -825,28 +825,28 @@ static int compat_raw_getsockopt(struct sock *sk, int level, int optname, | |||
825 | static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) | 825 | static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) |
826 | { | 826 | { |
827 | switch (cmd) { | 827 | switch (cmd) { |
828 | case SIOCOUTQ: { | 828 | case SIOCOUTQ: { |
829 | int amount = sk_wmem_alloc_get(sk); | 829 | int amount = sk_wmem_alloc_get(sk); |
830 | 830 | ||
831 | return put_user(amount, (int __user *)arg); | 831 | return put_user(amount, (int __user *)arg); |
832 | } | 832 | } |
833 | case SIOCINQ: { | 833 | case SIOCINQ: { |
834 | struct sk_buff *skb; | 834 | struct sk_buff *skb; |
835 | int amount = 0; | 835 | int amount = 0; |
836 | 836 | ||
837 | spin_lock_bh(&sk->sk_receive_queue.lock); | 837 | spin_lock_bh(&sk->sk_receive_queue.lock); |
838 | skb = skb_peek(&sk->sk_receive_queue); | 838 | skb = skb_peek(&sk->sk_receive_queue); |
839 | if (skb != NULL) | 839 | if (skb != NULL) |
840 | amount = skb->len; | 840 | amount = skb->len; |
841 | spin_unlock_bh(&sk->sk_receive_queue.lock); | 841 | spin_unlock_bh(&sk->sk_receive_queue.lock); |
842 | return put_user(amount, (int __user *)arg); | 842 | return put_user(amount, (int __user *)arg); |
843 | } | 843 | } |
844 | 844 | ||
845 | default: | 845 | default: |
846 | #ifdef CONFIG_IP_MROUTE | 846 | #ifdef CONFIG_IP_MROUTE |
847 | return ipmr_ioctl(sk, cmd, (void __user *)arg); | 847 | return ipmr_ioctl(sk, cmd, (void __user *)arg); |
848 | #else | 848 | #else |
849 | return -ENOIOCTLCMD; | 849 | return -ENOIOCTLCMD; |
850 | #endif | 850 | #endif |
851 | } | 851 | } |
852 | } | 852 | } |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index aa13ef10511..33137307d52 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -108,6 +108,7 @@ | |||
108 | #ifdef CONFIG_SYSCTL | 108 | #ifdef CONFIG_SYSCTL |
109 | #include <linux/sysctl.h> | 109 | #include <linux/sysctl.h> |
110 | #endif | 110 | #endif |
111 | #include <net/atmclip.h> | ||
111 | 112 | ||
112 | #define RT_FL_TOS(oldflp4) \ | 113 | #define RT_FL_TOS(oldflp4) \ |
113 | ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) | 114 | ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) |
@@ -184,6 +185,8 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) | |||
184 | return p; | 185 | return p; |
185 | } | 186 | } |
186 | 187 | ||
188 | static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr); | ||
189 | |||
187 | static struct dst_ops ipv4_dst_ops = { | 190 | static struct dst_ops ipv4_dst_ops = { |
188 | .family = AF_INET, | 191 | .family = AF_INET, |
189 | .protocol = cpu_to_be16(ETH_P_IP), | 192 | .protocol = cpu_to_be16(ETH_P_IP), |
@@ -198,6 +201,7 @@ static struct dst_ops ipv4_dst_ops = { | |||
198 | .link_failure = ipv4_link_failure, | 201 | .link_failure = ipv4_link_failure, |
199 | .update_pmtu = ip_rt_update_pmtu, | 202 | .update_pmtu = ip_rt_update_pmtu, |
200 | .local_out = __ip_local_out, | 203 | .local_out = __ip_local_out, |
204 | .neigh_lookup = ipv4_neigh_lookup, | ||
201 | }; | 205 | }; |
202 | 206 | ||
203 | #define ECN_OR_COST(class) TC_PRIO_##class | 207 | #define ECN_OR_COST(class) TC_PRIO_##class |
@@ -411,8 +415,10 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) | |||
411 | "HHUptod\tSpecDst"); | 415 | "HHUptod\tSpecDst"); |
412 | else { | 416 | else { |
413 | struct rtable *r = v; | 417 | struct rtable *r = v; |
418 | struct neighbour *n; | ||
414 | int len; | 419 | int len; |
415 | 420 | ||
421 | n = dst_get_neighbour(&r->dst); | ||
416 | seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" | 422 | seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" |
417 | "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", | 423 | "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", |
418 | r->dst.dev ? r->dst.dev->name : "*", | 424 | r->dst.dev ? r->dst.dev->name : "*", |
@@ -425,9 +431,8 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) | |||
425 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + | 431 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + |
426 | dst_metric(&r->dst, RTAX_RTTVAR)), | 432 | dst_metric(&r->dst, RTAX_RTTVAR)), |
427 | r->rt_key_tos, | 433 | r->rt_key_tos, |
428 | r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, | 434 | -1, |
429 | r->dst.hh ? (r->dst.hh->hh_output == | 435 | (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0, |
430 | dev_queue_xmit) : 0, | ||
431 | r->rt_spec_dst, &len); | 436 | r->rt_spec_dst, &len); |
432 | 437 | ||
433 | seq_printf(seq, "%*s\n", 127 - len, ""); | 438 | seq_printf(seq, "%*s\n", 127 - len, ""); |
@@ -1006,6 +1011,37 @@ static int slow_chain_length(const struct rtable *head) | |||
1006 | return length >> FRACT_BITS; | 1011 | return length >> FRACT_BITS; |
1007 | } | 1012 | } |
1008 | 1013 | ||
1014 | static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) | ||
1015 | { | ||
1016 | struct neigh_table *tbl = &arp_tbl; | ||
1017 | static const __be32 inaddr_any = 0; | ||
1018 | struct net_device *dev = dst->dev; | ||
1019 | const __be32 *pkey = daddr; | ||
1020 | struct neighbour *n; | ||
1021 | |||
1022 | #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) | ||
1023 | if (dev->type == ARPHRD_ATM) | ||
1024 | tbl = clip_tbl_hook; | ||
1025 | #endif | ||
1026 | if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) | ||
1027 | pkey = &inaddr_any; | ||
1028 | |||
1029 | n = __ipv4_neigh_lookup(tbl, dev, *(__force u32 *)pkey); | ||
1030 | if (n) | ||
1031 | return n; | ||
1032 | return neigh_create(tbl, pkey, dev); | ||
1033 | } | ||
1034 | |||
1035 | static int rt_bind_neighbour(struct rtable *rt) | ||
1036 | { | ||
1037 | struct neighbour *n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); | ||
1038 | if (IS_ERR(n)) | ||
1039 | return PTR_ERR(n); | ||
1040 | dst_set_neighbour(&rt->dst, n); | ||
1041 | |||
1042 | return 0; | ||
1043 | } | ||
1044 | |||
1009 | static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt, | 1045 | static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt, |
1010 | struct sk_buff *skb, int ifindex) | 1046 | struct sk_buff *skb, int ifindex) |
1011 | { | 1047 | { |
@@ -1042,7 +1078,7 @@ restart: | |||
1042 | 1078 | ||
1043 | rt->dst.flags |= DST_NOCACHE; | 1079 | rt->dst.flags |= DST_NOCACHE; |
1044 | if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { | 1080 | if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { |
1045 | int err = arp_bind_neighbour(&rt->dst); | 1081 | int err = rt_bind_neighbour(rt); |
1046 | if (err) { | 1082 | if (err) { |
1047 | if (net_ratelimit()) | 1083 | if (net_ratelimit()) |
1048 | printk(KERN_WARNING | 1084 | printk(KERN_WARNING |
@@ -1138,7 +1174,7 @@ restart: | |||
1138 | route or unicast forwarding path. | 1174 | route or unicast forwarding path. |
1139 | */ | 1175 | */ |
1140 | if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { | 1176 | if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { |
1141 | int err = arp_bind_neighbour(&rt->dst); | 1177 | int err = rt_bind_neighbour(rt); |
1142 | if (err) { | 1178 | if (err) { |
1143 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1179 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1144 | 1180 | ||
@@ -1439,20 +1475,20 @@ static int ip_error(struct sk_buff *skb) | |||
1439 | int code; | 1475 | int code; |
1440 | 1476 | ||
1441 | switch (rt->dst.error) { | 1477 | switch (rt->dst.error) { |
1442 | case EINVAL: | 1478 | case EINVAL: |
1443 | default: | 1479 | default: |
1444 | goto out; | 1480 | goto out; |
1445 | case EHOSTUNREACH: | 1481 | case EHOSTUNREACH: |
1446 | code = ICMP_HOST_UNREACH; | 1482 | code = ICMP_HOST_UNREACH; |
1447 | break; | 1483 | break; |
1448 | case ENETUNREACH: | 1484 | case ENETUNREACH: |
1449 | code = ICMP_NET_UNREACH; | 1485 | code = ICMP_NET_UNREACH; |
1450 | IP_INC_STATS_BH(dev_net(rt->dst.dev), | 1486 | IP_INC_STATS_BH(dev_net(rt->dst.dev), |
1451 | IPSTATS_MIB_INNOROUTES); | 1487 | IPSTATS_MIB_INNOROUTES); |
1452 | break; | 1488 | break; |
1453 | case EACCES: | 1489 | case EACCES: |
1454 | code = ICMP_PKT_FILTERED; | 1490 | code = ICMP_PKT_FILTERED; |
1455 | break; | 1491 | break; |
1456 | } | 1492 | } |
1457 | 1493 | ||
1458 | if (!rt->peer) | 1494 | if (!rt->peer) |
@@ -1592,23 +1628,24 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) | |||
1592 | { | 1628 | { |
1593 | struct rtable *rt = (struct rtable *) dst; | 1629 | struct rtable *rt = (struct rtable *) dst; |
1594 | __be32 orig_gw = rt->rt_gateway; | 1630 | __be32 orig_gw = rt->rt_gateway; |
1631 | struct neighbour *n; | ||
1595 | 1632 | ||
1596 | dst_confirm(&rt->dst); | 1633 | dst_confirm(&rt->dst); |
1597 | 1634 | ||
1598 | neigh_release(rt->dst.neighbour); | 1635 | neigh_release(dst_get_neighbour(&rt->dst)); |
1599 | rt->dst.neighbour = NULL; | 1636 | dst_set_neighbour(&rt->dst, NULL); |
1600 | 1637 | ||
1601 | rt->rt_gateway = peer->redirect_learned.a4; | 1638 | rt->rt_gateway = peer->redirect_learned.a4; |
1602 | if (arp_bind_neighbour(&rt->dst) || | 1639 | rt_bind_neighbour(rt); |
1603 | !(rt->dst.neighbour->nud_state & NUD_VALID)) { | 1640 | n = dst_get_neighbour(&rt->dst); |
1604 | if (rt->dst.neighbour) | 1641 | if (!n || !(n->nud_state & NUD_VALID)) { |
1605 | neigh_event_send(rt->dst.neighbour, NULL); | 1642 | if (n) |
1643 | neigh_event_send(n, NULL); | ||
1606 | rt->rt_gateway = orig_gw; | 1644 | rt->rt_gateway = orig_gw; |
1607 | return -EAGAIN; | 1645 | return -EAGAIN; |
1608 | } else { | 1646 | } else { |
1609 | rt->rt_flags |= RTCF_REDIRECTED; | 1647 | rt->rt_flags |= RTCF_REDIRECTED; |
1610 | call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, | 1648 | call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); |
1611 | rt->dst.neighbour); | ||
1612 | } | 1649 | } |
1613 | return 0; | 1650 | return 0; |
1614 | } | 1651 | } |
@@ -2708,6 +2745,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { | |||
2708 | .default_advmss = ipv4_default_advmss, | 2745 | .default_advmss = ipv4_default_advmss, |
2709 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, | 2746 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, |
2710 | .cow_metrics = ipv4_rt_blackhole_cow_metrics, | 2747 | .cow_metrics = ipv4_rt_blackhole_cow_metrics, |
2748 | .neigh_lookup = ipv4_neigh_lookup, | ||
2711 | }; | 2749 | }; |
2712 | 2750 | ||
2713 | struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) | 2751 | struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) |
@@ -3303,7 +3341,7 @@ int __init ip_rt_init(void) | |||
3303 | xfrm_init(); | 3341 | xfrm_init(); |
3304 | xfrm4_init(ip_rt_max_size); | 3342 | xfrm4_init(ip_rt_max_size); |
3305 | #endif | 3343 | #endif |
3306 | rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); | 3344 | rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL); |
3307 | 3345 | ||
3308 | #ifdef CONFIG_SYSCTL | 3346 | #ifdef CONFIG_SYSCTL |
3309 | register_pernet_subsys(&sysctl_route_ops); | 3347 | register_pernet_subsys(&sysctl_route_ops); |
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 26461492a84..92bb9434b33 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
@@ -316,6 +316,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
316 | ireq->wscale_ok = tcp_opt.wscale_ok; | 316 | ireq->wscale_ok = tcp_opt.wscale_ok; |
317 | ireq->tstamp_ok = tcp_opt.saw_tstamp; | 317 | ireq->tstamp_ok = tcp_opt.saw_tstamp; |
318 | req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; | 318 | req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; |
319 | treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; | ||
319 | 320 | ||
320 | /* We throwed the options of the initial SYN away, so we hope | 321 | /* We throwed the options of the initial SYN away, so we hope |
321 | * the ACK carries the same options again (see RFC1122 4.2.3.8) | 322 | * the ACK carries the same options again (see RFC1122 4.2.3.8) |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 57d0752e239..69fd7201129 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -398,20 +398,6 @@ static struct ctl_table ipv4_table[] = { | |||
398 | .proc_handler = proc_dointvec_jiffies, | 398 | .proc_handler = proc_dointvec_jiffies, |
399 | }, | 399 | }, |
400 | { | 400 | { |
401 | .procname = "inet_peer_gc_mintime", | ||
402 | .data = &inet_peer_gc_mintime, | ||
403 | .maxlen = sizeof(int), | ||
404 | .mode = 0644, | ||
405 | .proc_handler = proc_dointvec_jiffies, | ||
406 | }, | ||
407 | { | ||
408 | .procname = "inet_peer_gc_maxtime", | ||
409 | .data = &inet_peer_gc_maxtime, | ||
410 | .maxlen = sizeof(int), | ||
411 | .mode = 0644, | ||
412 | .proc_handler = proc_dointvec_jiffies, | ||
413 | }, | ||
414 | { | ||
415 | .procname = "tcp_orphan_retries", | 401 | .procname = "tcp_orphan_retries", |
416 | .data = &sysctl_tcp_orphan_retries, | 402 | .data = &sysctl_tcp_orphan_retries, |
417 | .maxlen = sizeof(int), | 403 | .maxlen = sizeof(int), |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 054a59d21eb..46febcacb72 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -3220,7 +3220,7 @@ __setup("thash_entries=", set_thash_entries); | |||
3220 | void __init tcp_init(void) | 3220 | void __init tcp_init(void) |
3221 | { | 3221 | { |
3222 | struct sk_buff *skb = NULL; | 3222 | struct sk_buff *skb = NULL; |
3223 | unsigned long nr_pages, limit; | 3223 | unsigned long limit; |
3224 | int i, max_share, cnt; | 3224 | int i, max_share, cnt; |
3225 | unsigned long jiffy = jiffies; | 3225 | unsigned long jiffy = jiffies; |
3226 | 3226 | ||
@@ -3277,13 +3277,7 @@ void __init tcp_init(void) | |||
3277 | sysctl_tcp_max_orphans = cnt / 2; | 3277 | sysctl_tcp_max_orphans = cnt / 2; |
3278 | sysctl_max_syn_backlog = max(128, cnt / 256); | 3278 | sysctl_max_syn_backlog = max(128, cnt / 256); |
3279 | 3279 | ||
3280 | /* Set the pressure threshold to be a fraction of global memory that | 3280 | limit = nr_free_buffer_pages() / 8; |
3281 | * is up to 1/2 at 256 MB, decreasing toward zero with the amount of | ||
3282 | * memory, with a floor of 128 pages. | ||
3283 | */ | ||
3284 | nr_pages = totalram_pages - totalhigh_pages; | ||
3285 | limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); | ||
3286 | limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); | ||
3287 | limit = max(limit, 128UL); | 3281 | limit = max(limit, 128UL); |
3288 | sysctl_tcp_mem[0] = limit / 4 * 3; | 3282 | sysctl_tcp_mem[0] = limit / 4 * 3; |
3289 | sysctl_tcp_mem[1] = limit; | 3283 | sysctl_tcp_mem[1] = limit; |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bef9f04c22b..ea0d2183df4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -880,6 +880,11 @@ static void tcp_init_metrics(struct sock *sk) | |||
880 | tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); | 880 | tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); |
881 | if (tp->snd_ssthresh > tp->snd_cwnd_clamp) | 881 | if (tp->snd_ssthresh > tp->snd_cwnd_clamp) |
882 | tp->snd_ssthresh = tp->snd_cwnd_clamp; | 882 | tp->snd_ssthresh = tp->snd_cwnd_clamp; |
883 | } else { | ||
884 | /* ssthresh may have been reduced unnecessarily during. | ||
885 | * 3WHS. Restore it back to its initial default. | ||
886 | */ | ||
887 | tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; | ||
883 | } | 888 | } |
884 | if (dst_metric(dst, RTAX_REORDERING) && | 889 | if (dst_metric(dst, RTAX_REORDERING) && |
885 | tp->reordering != dst_metric(dst, RTAX_REORDERING)) { | 890 | tp->reordering != dst_metric(dst, RTAX_REORDERING)) { |
@@ -887,10 +892,7 @@ static void tcp_init_metrics(struct sock *sk) | |||
887 | tp->reordering = dst_metric(dst, RTAX_REORDERING); | 892 | tp->reordering = dst_metric(dst, RTAX_REORDERING); |
888 | } | 893 | } |
889 | 894 | ||
890 | if (dst_metric(dst, RTAX_RTT) == 0) | 895 | if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0) |
891 | goto reset; | ||
892 | |||
893 | if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3)) | ||
894 | goto reset; | 896 | goto reset; |
895 | 897 | ||
896 | /* Initial rtt is determined from SYN,SYN-ACK. | 898 | /* Initial rtt is determined from SYN,SYN-ACK. |
@@ -916,19 +918,26 @@ static void tcp_init_metrics(struct sock *sk) | |||
916 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); | 918 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); |
917 | } | 919 | } |
918 | tcp_set_rto(sk); | 920 | tcp_set_rto(sk); |
919 | if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) { | ||
920 | reset: | 921 | reset: |
921 | /* Play conservative. If timestamps are not | 922 | if (tp->srtt == 0) { |
922 | * supported, TCP will fail to recalculate correct | 923 | /* RFC2988bis: We've failed to get a valid RTT sample from |
923 | * rtt, if initial rto is too small. FORGET ALL AND RESET! | 924 | * 3WHS. This is most likely due to retransmission, |
925 | * including spurious one. Reset the RTO back to 3secs | ||
926 | * from the more aggressive 1sec to avoid more spurious | ||
927 | * retransmission. | ||
924 | */ | 928 | */ |
925 | if (!tp->rx_opt.saw_tstamp && tp->srtt) { | 929 | tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; |
926 | tp->srtt = 0; | 930 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; |
927 | tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; | ||
928 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; | ||
929 | } | ||
930 | } | 931 | } |
931 | tp->snd_cwnd = tcp_init_cwnd(tp, dst); | 932 | /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been |
933 | * retransmitted. In light of RFC2988bis' more aggressive 1sec | ||
934 | * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK | ||
935 | * retransmission has occurred. | ||
936 | */ | ||
937 | if (tp->total_retrans > 1) | ||
938 | tp->snd_cwnd = 1; | ||
939 | else | ||
940 | tp->snd_cwnd = tcp_init_cwnd(tp, dst); | ||
932 | tp->snd_cwnd_stamp = tcp_time_stamp; | 941 | tp->snd_cwnd_stamp = tcp_time_stamp; |
933 | } | 942 | } |
934 | 943 | ||
@@ -3112,12 +3121,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) | |||
3112 | tcp_xmit_retransmit_queue(sk); | 3121 | tcp_xmit_retransmit_queue(sk); |
3113 | } | 3122 | } |
3114 | 3123 | ||
3115 | static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) | 3124 | void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) |
3116 | { | 3125 | { |
3117 | tcp_rtt_estimator(sk, seq_rtt); | 3126 | tcp_rtt_estimator(sk, seq_rtt); |
3118 | tcp_set_rto(sk); | 3127 | tcp_set_rto(sk); |
3119 | inet_csk(sk)->icsk_backoff = 0; | 3128 | inet_csk(sk)->icsk_backoff = 0; |
3120 | } | 3129 | } |
3130 | EXPORT_SYMBOL(tcp_valid_rtt_meas); | ||
3121 | 3131 | ||
3122 | /* Read draft-ietf-tcplw-high-performance before mucking | 3132 | /* Read draft-ietf-tcplw-high-performance before mucking |
3123 | * with this code. (Supersedes RFC1323) | 3133 | * with this code. (Supersedes RFC1323) |
@@ -5806,12 +5816,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5806 | tp->rx_opt.snd_wscale; | 5816 | tp->rx_opt.snd_wscale; |
5807 | tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); | 5817 | tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); |
5808 | 5818 | ||
5809 | /* tcp_ack considers this ACK as duplicate | ||
5810 | * and does not calculate rtt. | ||
5811 | * Force it here. | ||
5812 | */ | ||
5813 | tcp_ack_update_rtt(sk, 0, 0); | ||
5814 | |||
5815 | if (tp->rx_opt.tstamp_ok) | 5819 | if (tp->rx_opt.tstamp_ok) |
5816 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; | 5820 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; |
5817 | 5821 | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 708dc203b03..955b8e65b69 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -429,8 +429,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
429 | break; | 429 | break; |
430 | 430 | ||
431 | icsk->icsk_backoff--; | 431 | icsk->icsk_backoff--; |
432 | inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << | 432 | inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) : |
433 | icsk->icsk_backoff; | 433 | TCP_TIMEOUT_INIT) << icsk->icsk_backoff; |
434 | tcp_bound_rto(sk); | 434 | tcp_bound_rto(sk); |
435 | 435 | ||
436 | skb = tcp_write_queue_head(sk); | 436 | skb = tcp_write_queue_head(sk); |
@@ -1384,6 +1384,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1384 | isn = tcp_v4_init_sequence(skb); | 1384 | isn = tcp_v4_init_sequence(skb); |
1385 | } | 1385 | } |
1386 | tcp_rsk(req)->snt_isn = isn; | 1386 | tcp_rsk(req)->snt_isn = isn; |
1387 | tcp_rsk(req)->snt_synack = tcp_time_stamp; | ||
1387 | 1388 | ||
1388 | if (tcp_v4_send_synack(sk, dst, req, | 1389 | if (tcp_v4_send_synack(sk, dst, req, |
1389 | (struct request_values *)&tmp_ext) || | 1390 | (struct request_values *)&tmp_ext) || |
@@ -1458,6 +1459,10 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1458 | newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; | 1459 | newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; |
1459 | 1460 | ||
1460 | tcp_initialize_rcv_mss(newsk); | 1461 | tcp_initialize_rcv_mss(newsk); |
1462 | if (tcp_rsk(req)->snt_synack) | ||
1463 | tcp_valid_rtt_meas(newsk, | ||
1464 | tcp_time_stamp - tcp_rsk(req)->snt_synack); | ||
1465 | newtp->total_retrans = req->retrans; | ||
1461 | 1466 | ||
1462 | #ifdef CONFIG_TCP_MD5SIG | 1467 | #ifdef CONFIG_TCP_MD5SIG |
1463 | /* Copy over the MD5 key from the original socket */ | 1468 | /* Copy over the MD5 key from the original socket */ |
@@ -1855,7 +1860,7 @@ static int tcp_v4_init_sock(struct sock *sk) | |||
1855 | * algorithms that we must have the following bandaid to talk | 1860 | * algorithms that we must have the following bandaid to talk |
1856 | * efficiently to them. -DaveM | 1861 | * efficiently to them. -DaveM |
1857 | */ | 1862 | */ |
1858 | tp->snd_cwnd = 2; | 1863 | tp->snd_cwnd = TCP_INIT_CWND; |
1859 | 1864 | ||
1860 | /* See draft-stevens-tcpca-spec-01 for discussion of the | 1865 | /* See draft-stevens-tcpca-spec-01 for discussion of the |
1861 | * initialization of these values. | 1866 | * initialization of these values. |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 80b1f80759a..d2fe4e06b47 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -486,7 +486,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
486 | * algorithms that we must have the following bandaid to talk | 486 | * algorithms that we must have the following bandaid to talk |
487 | * efficiently to them. -DaveM | 487 | * efficiently to them. -DaveM |
488 | */ | 488 | */ |
489 | newtp->snd_cwnd = 2; | 489 | newtp->snd_cwnd = TCP_INIT_CWND; |
490 | newtp->snd_cwnd_cnt = 0; | 490 | newtp->snd_cwnd_cnt = 0; |
491 | newtp->bytes_acked = 0; | 491 | newtp->bytes_acked = 0; |
492 | 492 | ||
@@ -720,6 +720,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
720 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); | 720 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); |
721 | return NULL; | 721 | return NULL; |
722 | } | 722 | } |
723 | if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr) | ||
724 | tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr; | ||
725 | else if (req->retrans) /* don't take RTT sample if retrans && ~TS */ | ||
726 | tcp_rsk(req)->snt_synack = 0; | ||
723 | 727 | ||
724 | /* OK, ACK is valid, create big socket and | 728 | /* OK, ACK is valid, create big socket and |
725 | * feed this segment to it. It will repeat all | 729 | * feed this segment to it. It will repeat all |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index abca870d8ff..1b5a19340a9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -105,6 +105,7 @@ | |||
105 | #include <net/route.h> | 105 | #include <net/route.h> |
106 | #include <net/checksum.h> | 106 | #include <net/checksum.h> |
107 | #include <net/xfrm.h> | 107 | #include <net/xfrm.h> |
108 | #include <trace/events/udp.h> | ||
108 | #include "udp_impl.h" | 109 | #include "udp_impl.h" |
109 | 110 | ||
110 | struct udp_table udp_table __read_mostly; | 111 | struct udp_table udp_table __read_mostly; |
@@ -1249,6 +1250,9 @@ csum_copy_err: | |||
1249 | 1250 | ||
1250 | if (noblock) | 1251 | if (noblock) |
1251 | return -EAGAIN; | 1252 | return -EAGAIN; |
1253 | |||
1254 | /* starting over for a new packet */ | ||
1255 | msg->msg_flags &= ~MSG_TRUNC; | ||
1252 | goto try_again; | 1256 | goto try_again; |
1253 | } | 1257 | } |
1254 | 1258 | ||
@@ -1363,6 +1367,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
1363 | is_udplite); | 1367 | is_udplite); |
1364 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); | 1368 | UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); |
1365 | kfree_skb(skb); | 1369 | kfree_skb(skb); |
1370 | trace_udp_fail_queue_rcv_skb(rc, sk); | ||
1366 | return -1; | 1371 | return -1; |
1367 | } | 1372 | } |
1368 | 1373 | ||
@@ -2206,16 +2211,10 @@ void __init udp_table_init(struct udp_table *table, const char *name) | |||
2206 | 2211 | ||
2207 | void __init udp_init(void) | 2212 | void __init udp_init(void) |
2208 | { | 2213 | { |
2209 | unsigned long nr_pages, limit; | 2214 | unsigned long limit; |
2210 | 2215 | ||
2211 | udp_table_init(&udp_table, "UDP"); | 2216 | udp_table_init(&udp_table, "UDP"); |
2212 | /* Set the pressure threshold up by the same strategy of TCP. It is a | 2217 | limit = nr_free_buffer_pages() / 8; |
2213 | * fraction of global memory that is up to 1/2 at 256 MB, decreasing | ||
2214 | * toward zero with the amount of memory, with a floor of 128 pages. | ||
2215 | */ | ||
2216 | nr_pages = totalram_pages - totalhigh_pages; | ||
2217 | limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); | ||
2218 | limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); | ||
2219 | limit = max(limit, 128UL); | 2218 | limit = max(limit, 128UL); |
2220 | sysctl_udp_mem[0] = limit / 4 * 3; | 2219 | sysctl_udp_mem[0] = limit / 4 * 3; |
2221 | sysctl_udp_mem[1] = limit; | 2220 | sysctl_udp_mem[1] = limit; |
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 2d51840e53a..327a617d594 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c | |||
@@ -32,7 +32,12 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) | |||
32 | dst = skb_dst(skb); | 32 | dst = skb_dst(skb); |
33 | mtu = dst_mtu(dst); | 33 | mtu = dst_mtu(dst); |
34 | if (skb->len > mtu) { | 34 | if (skb->len > mtu) { |
35 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); | 35 | if (skb->sk) |
36 | ip_local_error(skb->sk, EMSGSIZE, ip_hdr(skb)->daddr, | ||
37 | inet_sk(skb->sk)->inet_dport, mtu); | ||
38 | else | ||
39 | icmp_send(skb, ICMP_DEST_UNREACH, | ||
40 | ICMP_FRAG_NEEDED, htonl(mtu)); | ||
36 | ret = -EMSGSIZE; | 41 | ret = -EMSGSIZE; |
37 | } | 42 | } |
38 | out: | 43 | out: |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 981e43eaf70..fc5368ad2b0 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -117,7 +117,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) | |||
117 | memset(fl4, 0, sizeof(struct flowi4)); | 117 | memset(fl4, 0, sizeof(struct flowi4)); |
118 | fl4->flowi4_mark = skb->mark; | 118 | fl4->flowi4_mark = skb->mark; |
119 | 119 | ||
120 | if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { | 120 | if (!ip_is_fragment(iph)) { |
121 | switch (iph->protocol) { | 121 | switch (iph->protocol) { |
122 | case IPPROTO_UDP: | 122 | case IPPROTO_UDP: |
123 | case IPPROTO_UDPLITE: | 123 | case IPPROTO_UDPLITE: |