aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-06-16 05:20:57 -0400
committerIngo Molnar <mingo@elte.hu>2008-06-16 05:20:57 -0400
commitfd2c17e1777d46cff14c25ea774a4d17459d188a (patch)
treee8299216de8e99418195ba64bcf8f679df3a66b3 /net/ipv4
parent74e411cb6443d8bcb55fbe89fcc7a9ee574df91b (diff)
parent066519068ad2fbe98c7f45552b1f592903a9c8c8 (diff)
Merge branch 'linus' into x86/timers
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/arp.c7
-rw-r--r--net/ipv4/cipso_ipv4.c4
-rw-r--r--net/ipv4/devinet.c9
-rw-r--r--net/ipv4/fib_frontend.c1
-rw-r--r--net/ipv4/fib_semantics.c5
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/inet_connection_sock.c11
-rw-r--r--net/ipv4/ip_gre.c146
-rw-r--r--net/ipv4/ipconfig.c6
-rw-r--r--net/ipv4/ipip.c130
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c14
-rw-r--r--net/ipv4/raw.c19
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv4/syncookies.c3
-rw-r--r--net/ipv4/tcp.c27
-rw-r--r--net/ipv4/tcp_input.c97
-rw-r--r--net/ipv4/tcp_ipv4.c10
-rw-r--r--net/ipv4/tcp_minisocks.c32
-rw-r--r--net/ipv4/tcp_output.c12
-rw-r--r--net/ipv4/tcp_timer.c5
-rw-r--r--net/ipv4/tunnel4.c2
-rw-r--r--net/ipv4/udp.c3
22 files changed, 131 insertions, 420 deletions
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 68b72a7a180..9b539fa9fe1 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -570,7 +570,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
570 * Allocate a buffer 570 * Allocate a buffer
571 */ 571 */
572 572
573 skb = alloc_skb(arp_hdr_len(dev) + LL_RESERVED_SPACE(dev), GFP_ATOMIC); 573 skb = alloc_skb(arp_hdr_len(dev) + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
574 if (skb == NULL) 574 if (skb == NULL)
575 return NULL; 575 return NULL;
576 576
@@ -1288,7 +1288,6 @@ static void arp_format_neigh_entry(struct seq_file *seq,
1288 struct neighbour *n) 1288 struct neighbour *n)
1289{ 1289{
1290 char hbuffer[HBUFFERLEN]; 1290 char hbuffer[HBUFFERLEN];
1291 const char hexbuf[] = "0123456789ABCDEF";
1292 int k, j; 1291 int k, j;
1293 char tbuf[16]; 1292 char tbuf[16];
1294 struct net_device *dev = n->dev; 1293 struct net_device *dev = n->dev;
@@ -1302,8 +1301,8 @@ static void arp_format_neigh_entry(struct seq_file *seq,
1302 else { 1301 else {
1303#endif 1302#endif
1304 for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < dev->addr_len; j++) { 1303 for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < dev->addr_len; j++) {
1305 hbuffer[k++] = hexbuf[(n->ha[j] >> 4) & 15]; 1304 hbuffer[k++] = hex_asc_hi(n->ha[j]);
1306 hbuffer[k++] = hexbuf[n->ha[j] & 15]; 1305 hbuffer[k++] = hex_asc_lo(n->ha[j]);
1307 hbuffer[k++] = ':'; 1306 hbuffer[k++] = ':';
1308 } 1307 }
1309 hbuffer[--k] = 0; 1308 hbuffer[--k] = 0;
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 05afb576d93..2c0e4572cc9 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -338,7 +338,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
338 return -ENOENT; 338 return -ENOENT;
339 339
340 hash = cipso_v4_map_cache_hash(key, key_len); 340 hash = cipso_v4_map_cache_hash(key, key_len);
341 bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); 341 bkt = hash & (CIPSO_V4_CACHE_BUCKETS - 1);
342 spin_lock_bh(&cipso_v4_cache[bkt].lock); 342 spin_lock_bh(&cipso_v4_cache[bkt].lock);
343 list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) { 343 list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) {
344 if (entry->hash == hash && 344 if (entry->hash == hash &&
@@ -417,7 +417,7 @@ int cipso_v4_cache_add(const struct sk_buff *skb,
417 atomic_inc(&secattr->cache->refcount); 417 atomic_inc(&secattr->cache->refcount);
418 entry->lsm_data = secattr->cache; 418 entry->lsm_data = secattr->cache;
419 419
420 bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); 420 bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1);
421 spin_lock_bh(&cipso_v4_cache[bkt].lock); 421 spin_lock_bh(&cipso_v4_cache[bkt].lock);
422 if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { 422 if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
423 list_add(&entry->list, &cipso_v4_cache[bkt].list); 423 list_add(&entry->list, &cipso_v4_cache[bkt].list);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 6848e4760f3..79a7ef6209f 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -90,7 +90,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90 [IFA_LOCAL] = { .type = NLA_U32 }, 90 [IFA_LOCAL] = { .type = NLA_U32 },
91 [IFA_ADDRESS] = { .type = NLA_U32 }, 91 [IFA_ADDRESS] = { .type = NLA_U32 },
92 [IFA_BROADCAST] = { .type = NLA_U32 }, 92 [IFA_BROADCAST] = { .type = NLA_U32 },
93 [IFA_ANYCAST] = { .type = NLA_U32 },
94 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, 93 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95}; 94};
96 95
@@ -536,9 +535,6 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
536 if (tb[IFA_BROADCAST]) 535 if (tb[IFA_BROADCAST])
537 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]); 536 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
538 537
539 if (tb[IFA_ANYCAST])
540 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
541
542 if (tb[IFA_LABEL]) 538 if (tb[IFA_LABEL])
543 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); 539 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
544 else 540 else
@@ -745,7 +741,6 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
745 break; 741 break;
746 inet_del_ifa(in_dev, ifap, 0); 742 inet_del_ifa(in_dev, ifap, 0);
747 ifa->ifa_broadcast = 0; 743 ifa->ifa_broadcast = 0;
748 ifa->ifa_anycast = 0;
749 ifa->ifa_scope = 0; 744 ifa->ifa_scope = 0;
750 } 745 }
751 746
@@ -1113,7 +1108,6 @@ static inline size_t inet_nlmsg_size(void)
1113 + nla_total_size(4) /* IFA_ADDRESS */ 1108 + nla_total_size(4) /* IFA_ADDRESS */
1114 + nla_total_size(4) /* IFA_LOCAL */ 1109 + nla_total_size(4) /* IFA_LOCAL */
1115 + nla_total_size(4) /* IFA_BROADCAST */ 1110 + nla_total_size(4) /* IFA_BROADCAST */
1116 + nla_total_size(4) /* IFA_ANYCAST */
1117 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ 1111 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1118} 1112}
1119 1113
@@ -1143,9 +1137,6 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1143 if (ifa->ifa_broadcast) 1137 if (ifa->ifa_broadcast)
1144 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast); 1138 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1145 1139
1146 if (ifa->ifa_anycast)
1147 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1148
1149 if (ifa->ifa_label[0]) 1140 if (ifa->ifa_label[0])
1150 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); 1141 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1151 1142
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 0f1557a4ac7..0b2ac6a3d90 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -506,7 +506,6 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
506 [RTA_PREFSRC] = { .type = NLA_U32 }, 506 [RTA_PREFSRC] = { .type = NLA_U32 },
507 [RTA_METRICS] = { .type = NLA_NESTED }, 507 [RTA_METRICS] = { .type = NLA_NESTED },
508 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 508 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
509 [RTA_PROTOINFO] = { .type = NLA_U32 },
510 [RTA_FLOW] = { .type = NLA_U32 }, 509 [RTA_FLOW] = { .type = NLA_U32 },
511}; 510};
512 511
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3b83c34019f..0d4d72827e4 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -960,7 +960,10 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
960 rtm->rtm_dst_len = dst_len; 960 rtm->rtm_dst_len = dst_len;
961 rtm->rtm_src_len = 0; 961 rtm->rtm_src_len = 0;
962 rtm->rtm_tos = tos; 962 rtm->rtm_tos = tos;
963 rtm->rtm_table = tb_id; 963 if (tb_id < 256)
964 rtm->rtm_table = tb_id;
965 else
966 rtm->rtm_table = RT_TABLE_COMPAT;
964 NLA_PUT_U32(skb, RTA_TABLE, tb_id); 967 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
965 rtm->rtm_type = type; 968 rtm->rtm_type = type;
966 rtm->rtm_flags = fi->fib_flags; 969 rtm->rtm_flags = fi->fib_flags;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6250f4239b6..2769dc4a4c8 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -292,7 +292,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
292 struct iphdr *pip; 292 struct iphdr *pip;
293 struct igmpv3_report *pig; 293 struct igmpv3_report *pig;
294 294
295 skb = alloc_skb(size + LL_RESERVED_SPACE(dev), GFP_ATOMIC); 295 skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
296 if (skb == NULL) 296 if (skb == NULL)
297 return NULL; 297 return NULL;
298 298
@@ -653,7 +653,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
653 return -1; 653 return -1;
654 } 654 }
655 655
656 skb=alloc_skb(IGMP_SIZE+LL_RESERVED_SPACE(dev), GFP_ATOMIC); 656 skb=alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
657 if (skb == NULL) { 657 if (skb == NULL) {
658 ip_rt_put(rt); 658 ip_rt_put(rt);
659 return -1; 659 return -1;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 828ea211ff2..045e799d3e1 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -419,7 +419,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
419 struct inet_connection_sock *icsk = inet_csk(parent); 419 struct inet_connection_sock *icsk = inet_csk(parent);
420 struct request_sock_queue *queue = &icsk->icsk_accept_queue; 420 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
421 struct listen_sock *lopt = queue->listen_opt; 421 struct listen_sock *lopt = queue->listen_opt;
422 int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 422 int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
423 int thresh = max_retries;
423 unsigned long now = jiffies; 424 unsigned long now = jiffies;
424 struct request_sock **reqp, *req; 425 struct request_sock **reqp, *req;
425 int i, budget; 426 int i, budget;
@@ -455,6 +456,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
455 } 456 }
456 } 457 }
457 458
459 if (queue->rskq_defer_accept)
460 max_retries = queue->rskq_defer_accept;
461
458 budget = 2 * (lopt->nr_table_entries / (timeout / interval)); 462 budget = 2 * (lopt->nr_table_entries / (timeout / interval));
459 i = lopt->clock_hand; 463 i = lopt->clock_hand;
460 464
@@ -462,8 +466,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
462 reqp=&lopt->syn_table[i]; 466 reqp=&lopt->syn_table[i];
463 while ((req = *reqp) != NULL) { 467 while ((req = *reqp) != NULL) {
464 if (time_after_eq(now, req->expires)) { 468 if (time_after_eq(now, req->expires)) {
465 if (req->retrans < thresh && 469 if ((req->retrans < (inet_rsk(req)->acked ? max_retries : thresh)) &&
466 !req->rsk_ops->rtx_syn_ack(parent, req)) { 470 (inet_rsk(req)->acked ||
471 !req->rsk_ops->rtx_syn_ack(parent, req))) {
467 unsigned long timeo; 472 unsigned long timeo;
468 473
469 if (req->retrans++ == 0) 474 if (req->retrans++ == 0)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2ada033406d..4342cba4ff8 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -313,9 +313,8 @@ static void ipgre_tunnel_uninit(struct net_device *dev)
313 313
314static void ipgre_err(struct sk_buff *skb, u32 info) 314static void ipgre_err(struct sk_buff *skb, u32 info)
315{ 315{
316#ifndef I_WISH_WORLD_WERE_PERFECT
317 316
318/* It is not :-( All the routers (except for Linux) return only 317/* All the routers (except for Linux) return only
319 8 bytes of packet payload. It means, that precise relaying of 318 8 bytes of packet payload. It means, that precise relaying of
320 ICMP in the real Internet is absolutely infeasible. 319 ICMP in the real Internet is absolutely infeasible.
321 320
@@ -398,149 +397,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
398out: 397out:
399 read_unlock(&ipgre_lock); 398 read_unlock(&ipgre_lock);
400 return; 399 return;
401#else
402 struct iphdr *iph = (struct iphdr*)dp;
403 struct iphdr *eiph;
404 __be16 *p = (__be16*)(dp+(iph->ihl<<2));
405 const int type = icmp_hdr(skb)->type;
406 const int code = icmp_hdr(skb)->code;
407 int rel_type = 0;
408 int rel_code = 0;
409 __be32 rel_info = 0;
410 __u32 n = 0;
411 __be16 flags;
412 int grehlen = (iph->ihl<<2) + 4;
413 struct sk_buff *skb2;
414 struct flowi fl;
415 struct rtable *rt;
416
417 if (p[1] != htons(ETH_P_IP))
418 return;
419
420 flags = p[0];
421 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
422 if (flags&(GRE_VERSION|GRE_ROUTING))
423 return;
424 if (flags&GRE_CSUM)
425 grehlen += 4;
426 if (flags&GRE_KEY)
427 grehlen += 4;
428 if (flags&GRE_SEQ)
429 grehlen += 4;
430 }
431 if (len < grehlen + sizeof(struct iphdr))
432 return;
433 eiph = (struct iphdr*)(dp + grehlen);
434
435 switch (type) {
436 default:
437 return;
438 case ICMP_PARAMETERPROB:
439 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
440 if (n < (iph->ihl<<2))
441 return;
442
443 /* So... This guy found something strange INSIDE encapsulated
444 packet. Well, he is fool, but what can we do ?
445 */
446 rel_type = ICMP_PARAMETERPROB;
447 n -= grehlen;
448 rel_info = htonl(n << 24);
449 break;
450
451 case ICMP_DEST_UNREACH:
452 switch (code) {
453 case ICMP_SR_FAILED:
454 case ICMP_PORT_UNREACH:
455 /* Impossible event. */
456 return;
457 case ICMP_FRAG_NEEDED:
458 /* And it is the only really necessary thing :-) */
459 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
460 if (n < grehlen+68)
461 return;
462 n -= grehlen;
463 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
464 if (n > ntohs(eiph->tot_len))
465 return;
466 rel_info = htonl(n);
467 break;
468 default:
469 /* All others are translated to HOST_UNREACH.
470 rfc2003 contains "deep thoughts" about NET_UNREACH,
471 I believe, it is just ether pollution. --ANK
472 */
473 rel_type = ICMP_DEST_UNREACH;
474 rel_code = ICMP_HOST_UNREACH;
475 break;
476 }
477 break;
478 case ICMP_TIME_EXCEEDED:
479 if (code != ICMP_EXC_TTL)
480 return;
481 break;
482 }
483
484 /* Prepare fake skb to feed it to icmp_send */
485 skb2 = skb_clone(skb, GFP_ATOMIC);
486 if (skb2 == NULL)
487 return;
488 dst_release(skb2->dst);
489 skb2->dst = NULL;
490 skb_pull(skb2, skb->data - (u8*)eiph);
491 skb_reset_network_header(skb2);
492
493 /* Try to guess incoming interface */
494 memset(&fl, 0, sizeof(fl));
495 fl.fl4_dst = eiph->saddr;
496 fl.fl4_tos = RT_TOS(eiph->tos);
497 fl.proto = IPPROTO_GRE;
498 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) {
499 kfree_skb(skb2);
500 return;
501 }
502 skb2->dev = rt->u.dst.dev;
503
504 /* route "incoming" packet */
505 if (rt->rt_flags&RTCF_LOCAL) {
506 ip_rt_put(rt);
507 rt = NULL;
508 fl.fl4_dst = eiph->daddr;
509 fl.fl4_src = eiph->saddr;
510 fl.fl4_tos = eiph->tos;
511 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
512 rt->u.dst.dev->type != ARPHRD_IPGRE) {
513 ip_rt_put(rt);
514 kfree_skb(skb2);
515 return;
516 }
517 } else {
518 ip_rt_put(rt);
519 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
520 skb2->dst->dev->type != ARPHRD_IPGRE) {
521 kfree_skb(skb2);
522 return;
523 }
524 }
525
526 /* change mtu on this route */
527 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
528 if (n > dst_mtu(skb2->dst)) {
529 kfree_skb(skb2);
530 return;
531 }
532 skb2->dst->ops->update_pmtu(skb2->dst, n);
533 } else if (type == ICMP_TIME_EXCEEDED) {
534 struct ip_tunnel *t = netdev_priv(skb2->dev);
535 if (t->parms.iph.ttl) {
536 rel_type = ICMP_DEST_UNREACH;
537 rel_code = ICMP_HOST_UNREACH;
538 }
539 }
540
541 icmp_send(skb2, rel_type, rel_code, rel_info);
542 kfree_skb(skb2);
543#endif
544} 400}
545 401
546static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 402static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 89dee4346f6..ed45037ce9b 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -710,14 +710,14 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
710 struct net_device *dev = d->dev; 710 struct net_device *dev = d->dev;
711 struct sk_buff *skb; 711 struct sk_buff *skb;
712 struct bootp_pkt *b; 712 struct bootp_pkt *b;
713 int hh_len = LL_RESERVED_SPACE(dev);
714 struct iphdr *h; 713 struct iphdr *h;
715 714
716 /* Allocate packet */ 715 /* Allocate packet */
717 skb = alloc_skb(sizeof(struct bootp_pkt) + hh_len + 15, GFP_KERNEL); 716 skb = alloc_skb(sizeof(struct bootp_pkt) + LL_ALLOCATED_SPACE(dev) + 15,
717 GFP_KERNEL);
718 if (!skb) 718 if (!skb)
719 return; 719 return;
720 skb_reserve(skb, hh_len); 720 skb_reserve(skb, LL_RESERVED_SPACE(dev));
721 b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt)); 721 b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt));
722 memset(b, 0, sizeof(struct bootp_pkt)); 722 memset(b, 0, sizeof(struct bootp_pkt));
723 723
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 149111f08e8..af5cb53da5c 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -278,9 +278,8 @@ static void ipip_tunnel_uninit(struct net_device *dev)
278 278
279static int ipip_err(struct sk_buff *skb, u32 info) 279static int ipip_err(struct sk_buff *skb, u32 info)
280{ 280{
281#ifndef I_WISH_WORLD_WERE_PERFECT
282 281
283/* It is not :-( All the routers (except for Linux) return only 282/* All the routers (except for Linux) return only
284 8 bytes of packet payload. It means, that precise relaying of 283 8 bytes of packet payload. It means, that precise relaying of
285 ICMP in the real Internet is absolutely infeasible. 284 ICMP in the real Internet is absolutely infeasible.
286 */ 285 */
@@ -337,133 +336,6 @@ static int ipip_err(struct sk_buff *skb, u32 info)
337out: 336out:
338 read_unlock(&ipip_lock); 337 read_unlock(&ipip_lock);
339 return err; 338 return err;
340#else
341 struct iphdr *iph = (struct iphdr*)dp;
342 int hlen = iph->ihl<<2;
343 struct iphdr *eiph;
344 const int type = icmp_hdr(skb)->type;
345 const int code = icmp_hdr(skb)->code;
346 int rel_type = 0;
347 int rel_code = 0;
348 __be32 rel_info = 0;
349 __u32 n = 0;
350 struct sk_buff *skb2;
351 struct flowi fl;
352 struct rtable *rt;
353
354 if (len < hlen + sizeof(struct iphdr))
355 return 0;
356 eiph = (struct iphdr*)(dp + hlen);
357
358 switch (type) {
359 default:
360 return 0;
361 case ICMP_PARAMETERPROB:
362 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
363 if (n < hlen)
364 return 0;
365
366 /* So... This guy found something strange INSIDE encapsulated
367 packet. Well, he is fool, but what can we do ?
368 */
369 rel_type = ICMP_PARAMETERPROB;
370 rel_info = htonl((n - hlen) << 24);
371 break;
372
373 case ICMP_DEST_UNREACH:
374 switch (code) {
375 case ICMP_SR_FAILED:
376 case ICMP_PORT_UNREACH:
377 /* Impossible event. */
378 return 0;
379 case ICMP_FRAG_NEEDED:
380 /* And it is the only really necessary thing :-) */
381 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
382 if (n < hlen+68)
383 return 0;
384 n -= hlen;
385 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
386 if (n > ntohs(eiph->tot_len))
387 return 0;
388 rel_info = htonl(n);
389 break;
390 default:
391 /* All others are translated to HOST_UNREACH.
392 rfc2003 contains "deep thoughts" about NET_UNREACH,
393 I believe, it is just ether pollution. --ANK
394 */
395 rel_type = ICMP_DEST_UNREACH;
396 rel_code = ICMP_HOST_UNREACH;
397 break;
398 }
399 break;
400 case ICMP_TIME_EXCEEDED:
401 if (code != ICMP_EXC_TTL)
402 return 0;
403 break;
404 }
405
406 /* Prepare fake skb to feed it to icmp_send */
407 skb2 = skb_clone(skb, GFP_ATOMIC);
408 if (skb2 == NULL)
409 return 0;
410 dst_release(skb2->dst);
411 skb2->dst = NULL;
412 skb_pull(skb2, skb->data - (u8*)eiph);
413 skb_reset_network_header(skb2);
414
415 /* Try to guess incoming interface */
416 memset(&fl, 0, sizeof(fl));
417 fl.fl4_daddr = eiph->saddr;
418 fl.fl4_tos = RT_TOS(eiph->tos);
419 fl.proto = IPPROTO_IPIP;
420 if (ip_route_output_key(dev_net(skb->dev), &rt, &key)) {
421 kfree_skb(skb2);
422 return 0;
423 }
424 skb2->dev = rt->u.dst.dev;
425
426 /* route "incoming" packet */
427 if (rt->rt_flags&RTCF_LOCAL) {
428 ip_rt_put(rt);
429 rt = NULL;
430 fl.fl4_daddr = eiph->daddr;
431 fl.fl4_src = eiph->saddr;
432 fl.fl4_tos = eiph->tos;
433 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
434 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
435 ip_rt_put(rt);
436 kfree_skb(skb2);
437 return 0;
438 }
439 } else {
440 ip_rt_put(rt);
441 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
442 skb2->dst->dev->type != ARPHRD_TUNNEL) {
443 kfree_skb(skb2);
444 return 0;
445 }
446 }
447
448 /* change mtu on this route */
449 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
450 if (n > dst_mtu(skb2->dst)) {
451 kfree_skb(skb2);
452 return 0;
453 }
454 skb2->dst->ops->update_pmtu(skb2->dst, n);
455 } else if (type == ICMP_TIME_EXCEEDED) {
456 struct ip_tunnel *t = netdev_priv(skb2->dev);
457 if (t->parms.iph.ttl) {
458 rel_type = ICMP_DEST_UNREACH;
459 rel_code = ICMP_HOST_UNREACH;
460 }
461 }
462
463 icmp_send(skb2, rel_type, rel_code, rel_info);
464 kfree_skb(skb2);
465 return 0;
466#endif
467} 339}
468 340
469static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph, 341static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 5daefad3d19..7750c97fde7 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -232,6 +232,11 @@ static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
232 } 232 }
233 } 233 }
234 } 234 }
235
236 /* don't trust len bigger than ctx buffer */
237 if (*len > ctx->end - ctx->pointer)
238 return 0;
239
235 return 1; 240 return 1;
236} 241}
237 242
@@ -250,6 +255,10 @@ static unsigned char asn1_header_decode(struct asn1_ctx *ctx,
250 if (!asn1_length_decode(ctx, &def, &len)) 255 if (!asn1_length_decode(ctx, &def, &len))
251 return 0; 256 return 0;
252 257
258 /* primitive shall be definite, indefinite shall be constructed */
259 if (*con == ASN1_PRI && !def)
260 return 0;
261
253 if (def) 262 if (def)
254 *eoc = ctx->pointer + len; 263 *eoc = ctx->pointer + len;
255 else 264 else
@@ -434,6 +443,11 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
434 unsigned long *optr; 443 unsigned long *optr;
435 444
436 size = eoc - ctx->pointer + 1; 445 size = eoc - ctx->pointer + 1;
446
447 /* first subid actually encodes first two subids */
448 if (size < 2 || size > ULONG_MAX/sizeof(unsigned long))
449 return 0;
450
437 *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); 451 *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
438 if (*oid == NULL) { 452 if (*oid == NULL) {
439 if (net_ratelimit()) 453 if (net_ratelimit())
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 11d7f753a82..e7e091d365f 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -322,7 +322,6 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
322 unsigned int flags) 322 unsigned int flags)
323{ 323{
324 struct inet_sock *inet = inet_sk(sk); 324 struct inet_sock *inet = inet_sk(sk);
325 int hh_len;
326 struct iphdr *iph; 325 struct iphdr *iph;
327 struct sk_buff *skb; 326 struct sk_buff *skb;
328 unsigned int iphlen; 327 unsigned int iphlen;
@@ -336,13 +335,12 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
336 if (flags&MSG_PROBE) 335 if (flags&MSG_PROBE)
337 goto out; 336 goto out;
338 337
339 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 338 skb = sock_alloc_send_skb(sk,
340 339 length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15,
341 skb = sock_alloc_send_skb(sk, length+hh_len+15, 340 flags & MSG_DONTWAIT, &err);
342 flags&MSG_DONTWAIT, &err);
343 if (skb == NULL) 341 if (skb == NULL)
344 goto error; 342 goto error;
345 skb_reserve(skb, hh_len); 343 skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev));
346 344
347 skb->priority = sk->sk_priority; 345 skb->priority = sk->sk_priority;
348 skb->mark = sk->sk_mark; 346 skb->mark = sk->sk_mark;
@@ -610,6 +608,14 @@ static void raw_close(struct sock *sk, long timeout)
610 sk_common_release(sk); 608 sk_common_release(sk);
611} 609}
612 610
611static int raw_destroy(struct sock *sk)
612{
613 lock_sock(sk);
614 ip_flush_pending_frames(sk);
615 release_sock(sk);
616 return 0;
617}
618
613/* This gets rid of all the nasties in af_inet. -DaveM */ 619/* This gets rid of all the nasties in af_inet. -DaveM */
614static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) 620static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
615{ 621{
@@ -822,6 +828,7 @@ struct proto raw_prot = {
822 .name = "RAW", 828 .name = "RAW",
823 .owner = THIS_MODULE, 829 .owner = THIS_MODULE,
824 .close = raw_close, 830 .close = raw_close,
831 .destroy = raw_destroy,
825 .connect = ip4_datagram_connect, 832 .connect = ip4_datagram_connect,
826 .disconnect = udp_disconnect, 833 .disconnect = udp_disconnect,
827 .ioctl = raw_ioctl, 834 .ioctl = raw_ioctl,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 92f90ae46f4..96be336064f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -160,7 +160,7 @@ static struct dst_ops ipv4_dst_ops = {
160 .negative_advice = ipv4_negative_advice, 160 .negative_advice = ipv4_negative_advice,
161 .link_failure = ipv4_link_failure, 161 .link_failure = ipv4_link_failure,
162 .update_pmtu = ip_rt_update_pmtu, 162 .update_pmtu = ip_rt_update_pmtu,
163 .local_out = ip_local_out, 163 .local_out = __ip_local_out,
164 .entry_size = sizeof(struct rtable), 164 .entry_size = sizeof(struct rtable),
165 .entries = ATOMIC_INIT(0), 165 .entries = ATOMIC_INIT(0),
166}; 166};
@@ -1792,7 +1792,7 @@ static int __mkroute_input(struct sk_buff *skb,
1792 if (err) 1792 if (err)
1793 flags |= RTCF_DIRECTSRC; 1793 flags |= RTCF_DIRECTSRC;
1794 1794
1795 if (out_dev == in_dev && err && !(flags & RTCF_MASQ) && 1795 if (out_dev == in_dev && err &&
1796 (IN_DEV_SHARED_MEDIA(out_dev) || 1796 (IN_DEV_SHARED_MEDIA(out_dev) ||
1797 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) 1797 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1798 flags |= RTCF_DOREDIRECT; 1798 flags |= RTCF_DOREDIRECT;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 73ba98921d6..d182a2a2629 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -285,7 +285,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
285 cookie_check_timestamp(&tcp_opt); 285 cookie_check_timestamp(&tcp_opt);
286 286
287 ret = NULL; 287 ret = NULL;
288 req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */ 288 req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
289 if (!req) 289 if (!req)
290 goto out; 290 goto out;
291 291
@@ -301,7 +301,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
301 ireq->rmt_port = th->source; 301 ireq->rmt_port = th->source;
302 ireq->loc_addr = ip_hdr(skb)->daddr; 302 ireq->loc_addr = ip_hdr(skb)->daddr;
303 ireq->rmt_addr = ip_hdr(skb)->saddr; 303 ireq->rmt_addr = ip_hdr(skb)->saddr;
304 ireq->opt = NULL;
305 ireq->snd_wscale = tcp_opt.snd_wscale; 304 ireq->snd_wscale = tcp_opt.snd_wscale;
306 ireq->rcv_wscale = tcp_opt.rcv_wscale; 305 ireq->rcv_wscale = tcp_opt.rcv_wscale;
307 ireq->sack_ok = tcp_opt.sack_ok; 306 ireq->sack_ok = tcp_opt.sack_ok;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f8865313862..fc54a48fde1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1227,7 +1227,14 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1227 copied += used; 1227 copied += used;
1228 offset += used; 1228 offset += used;
1229 } 1229 }
1230 if (offset != skb->len) 1230 /*
1231 * If recv_actor drops the lock (e.g. TCP splice
1232 * receive) the skb pointer might be invalid when
1233 * getting here: tcp_collapse might have deleted it
1234 * while aggregating skbs from the socket queue.
1235 */
1236 skb = tcp_recv_skb(sk, seq-1, &offset);
1237 if (!skb || (offset+1 != skb->len))
1231 break; 1238 break;
1232 } 1239 }
1233 if (tcp_hdr(skb)->fin) { 1240 if (tcp_hdr(skb)->fin) {
@@ -2105,12 +2112,15 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2105 break; 2112 break;
2106 2113
2107 case TCP_DEFER_ACCEPT: 2114 case TCP_DEFER_ACCEPT:
2108 if (val < 0) { 2115 icsk->icsk_accept_queue.rskq_defer_accept = 0;
2109 err = -EINVAL; 2116 if (val > 0) {
2110 } else { 2117 /* Translate value in seconds to number of
2111 if (val > MAX_TCP_ACCEPT_DEFERRED) 2118 * retransmits */
2112 val = MAX_TCP_ACCEPT_DEFERRED; 2119 while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
2113 icsk->icsk_accept_queue.rskq_defer_accept = val; 2120 val > ((TCP_TIMEOUT_INIT / HZ) <<
2121 icsk->icsk_accept_queue.rskq_defer_accept))
2122 icsk->icsk_accept_queue.rskq_defer_accept++;
2123 icsk->icsk_accept_queue.rskq_defer_accept++;
2114 } 2124 }
2115 break; 2125 break;
2116 2126
@@ -2292,7 +2302,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2292 val = (val ? : sysctl_tcp_fin_timeout) / HZ; 2302 val = (val ? : sysctl_tcp_fin_timeout) / HZ;
2293 break; 2303 break;
2294 case TCP_DEFER_ACCEPT: 2304 case TCP_DEFER_ACCEPT:
2295 val = icsk->icsk_accept_queue.rskq_defer_accept; 2305 val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
2306 ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
2296 break; 2307 break;
2297 case TCP_WINDOW_CLAMP: 2308 case TCP_WINDOW_CLAMP:
2298 val = tp->window_clamp; 2309 val = tp->window_clamp;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 26c936930e9..cad73b7dfef 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1392,9 +1392,9 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1392 1392
1393 if (before(next_dup->start_seq, skip_to_seq)) { 1393 if (before(next_dup->start_seq, skip_to_seq)) {
1394 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count); 1394 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count);
1395 tcp_sacktag_walk(skb, sk, NULL, 1395 skb = tcp_sacktag_walk(skb, sk, NULL,
1396 next_dup->start_seq, next_dup->end_seq, 1396 next_dup->start_seq, next_dup->end_seq,
1397 1, fack_count, reord, flag); 1397 1, fack_count, reord, flag);
1398 } 1398 }
1399 1399
1400 return skb; 1400 return skb;
@@ -1842,9 +1842,16 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
1842 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1842 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1843 } 1843 }
1844 1844
1845 /* Don't lost mark skbs that were fwd transmitted after RTO */ 1845 /* Marking forward transmissions that were made after RTO lost
1846 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) && 1846 * can cause unnecessary retransmissions in some scenarios,
1847 !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) { 1847 * SACK blocks will mitigate that in some but not in all cases.
1848 * We used to not mark them but it was causing break-ups with
1849 * receivers that do only in-order receival.
1850 *
1851 * TODO: we could detect presence of such receiver and select
1852 * different behavior per flow.
1853 */
1854 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
1848 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1855 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1849 tp->lost_out += tcp_skb_pcount(skb); 1856 tp->lost_out += tcp_skb_pcount(skb);
1850 } 1857 }
@@ -1860,7 +1867,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
1860 tp->reordering = min_t(unsigned int, tp->reordering, 1867 tp->reordering = min_t(unsigned int, tp->reordering,
1861 sysctl_tcp_reordering); 1868 sysctl_tcp_reordering);
1862 tcp_set_ca_state(sk, TCP_CA_Loss); 1869 tcp_set_ca_state(sk, TCP_CA_Loss);
1863 tp->high_seq = tp->frto_highmark; 1870 tp->high_seq = tp->snd_nxt;
1864 TCP_ECN_queue_cwr(tp); 1871 TCP_ECN_queue_cwr(tp);
1865 1872
1866 tcp_clear_retrans_hints_partial(tp); 1873 tcp_clear_retrans_hints_partial(tp);
@@ -2476,28 +2483,34 @@ static inline void tcp_complete_cwr(struct sock *sk)
2476 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); 2483 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2477} 2484}
2478 2485
2486static void tcp_try_keep_open(struct sock *sk)
2487{
2488 struct tcp_sock *tp = tcp_sk(sk);
2489 int state = TCP_CA_Open;
2490
2491 if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker)
2492 state = TCP_CA_Disorder;
2493
2494 if (inet_csk(sk)->icsk_ca_state != state) {
2495 tcp_set_ca_state(sk, state);
2496 tp->high_seq = tp->snd_nxt;
2497 }
2498}
2499
2479static void tcp_try_to_open(struct sock *sk, int flag) 2500static void tcp_try_to_open(struct sock *sk, int flag)
2480{ 2501{
2481 struct tcp_sock *tp = tcp_sk(sk); 2502 struct tcp_sock *tp = tcp_sk(sk);
2482 2503
2483 tcp_verify_left_out(tp); 2504 tcp_verify_left_out(tp);
2484 2505
2485 if (tp->retrans_out == 0) 2506 if (!tp->frto_counter && tp->retrans_out == 0)
2486 tp->retrans_stamp = 0; 2507 tp->retrans_stamp = 0;
2487 2508
2488 if (flag & FLAG_ECE) 2509 if (flag & FLAG_ECE)
2489 tcp_enter_cwr(sk, 1); 2510 tcp_enter_cwr(sk, 1);
2490 2511
2491 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { 2512 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2492 int state = TCP_CA_Open; 2513 tcp_try_keep_open(sk);
2493
2494 if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker)
2495 state = TCP_CA_Disorder;
2496
2497 if (inet_csk(sk)->icsk_ca_state != state) {
2498 tcp_set_ca_state(sk, state);
2499 tp->high_seq = tp->snd_nxt;
2500 }
2501 tcp_moderate_cwnd(tp); 2514 tcp_moderate_cwnd(tp);
2502 } else { 2515 } else {
2503 tcp_cwnd_down(sk, flag); 2516 tcp_cwnd_down(sk, flag);
@@ -3303,8 +3316,11 @@ no_queue:
3303 return 1; 3316 return 1;
3304 3317
3305old_ack: 3318old_ack:
3306 if (TCP_SKB_CB(skb)->sacked) 3319 if (TCP_SKB_CB(skb)->sacked) {
3307 tcp_sacktag_write_queue(sk, skb, prior_snd_una); 3320 tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3321 if (icsk->icsk_ca_state == TCP_CA_Open)
3322 tcp_try_keep_open(sk);
3323 }
3308 3324
3309uninteresting_ack: 3325uninteresting_ack:
3310 SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt); 3326 SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
@@ -4525,49 +4541,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
4525 } 4541 }
4526} 4542}
4527 4543
4528static int tcp_defer_accept_check(struct sock *sk)
4529{
4530 struct tcp_sock *tp = tcp_sk(sk);
4531
4532 if (tp->defer_tcp_accept.request) {
4533 int queued_data = tp->rcv_nxt - tp->copied_seq;
4534 int hasfin = !skb_queue_empty(&sk->sk_receive_queue) ?
4535 tcp_hdr((struct sk_buff *)
4536 sk->sk_receive_queue.prev)->fin : 0;
4537
4538 if (queued_data && hasfin)
4539 queued_data--;
4540
4541 if (queued_data &&
4542 tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) {
4543 if (sock_flag(sk, SOCK_KEEPOPEN)) {
4544 inet_csk_reset_keepalive_timer(sk,
4545 keepalive_time_when(tp));
4546 } else {
4547 inet_csk_delete_keepalive_timer(sk);
4548 }
4549
4550 inet_csk_reqsk_queue_add(
4551 tp->defer_tcp_accept.listen_sk,
4552 tp->defer_tcp_accept.request,
4553 sk);
4554
4555 tp->defer_tcp_accept.listen_sk->sk_data_ready(
4556 tp->defer_tcp_accept.listen_sk, 0);
4557
4558 sock_put(tp->defer_tcp_accept.listen_sk);
4559 sock_put(sk);
4560 tp->defer_tcp_accept.listen_sk = NULL;
4561 tp->defer_tcp_accept.request = NULL;
4562 } else if (hasfin ||
4563 tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) {
4564 tcp_reset(sk);
4565 return -1;
4566 }
4567 }
4568 return 0;
4569}
4570
4571static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) 4544static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
4572{ 4545{
4573 struct tcp_sock *tp = tcp_sk(sk); 4546 struct tcp_sock *tp = tcp_sk(sk);
@@ -4928,8 +4901,6 @@ step5:
4928 4901
4929 tcp_data_snd_check(sk); 4902 tcp_data_snd_check(sk);
4930 tcp_ack_snd_check(sk); 4903 tcp_ack_snd_check(sk);
4931
4932 tcp_defer_accept_check(sk);
4933 return 0; 4904 return 0;
4934 4905
4935csum_error: 4906csum_error:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cd601a866c2..97a230026e1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1285,7 +1285,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1285 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 1285 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1286 goto drop; 1286 goto drop;
1287 1287
1288 req = reqsk_alloc(&tcp_request_sock_ops); 1288 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1289 if (!req) 1289 if (!req)
1290 goto drop; 1290 goto drop;
1291 1291
@@ -1918,14 +1918,6 @@ int tcp_v4_destroy_sock(struct sock *sk)
1918 sk->sk_sndmsg_page = NULL; 1918 sk->sk_sndmsg_page = NULL;
1919 } 1919 }
1920 1920
1921 if (tp->defer_tcp_accept.request) {
1922 reqsk_free(tp->defer_tcp_accept.request);
1923 sock_put(tp->defer_tcp_accept.listen_sk);
1924 sock_put(sk);
1925 tp->defer_tcp_accept.listen_sk = NULL;
1926 tp->defer_tcp_accept.request = NULL;
1927 }
1928
1929 atomic_dec(&tcp_sockets_allocated); 1921 atomic_dec(&tcp_sockets_allocated);
1930 1922
1931 return 0; 1923 return 0;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 019c8c16e5c..8245247a6ce 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -571,8 +571,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
571 does sequence test, SYN is truncated, and thus we consider 571 does sequence test, SYN is truncated, and thus we consider
572 it a bare ACK. 572 it a bare ACK.
573 573
574 Both ends (listening sockets) accept the new incoming 574 If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
575 connection and try to talk to each other. 8-) 575 bare ACK. Otherwise, we create an established connection. Both
576 ends (listening sockets) accept the new incoming connection and try
577 to talk to each other. 8-)
576 578
577 Note: This case is both harmless, and rare. Possibility is about the 579 Note: This case is both harmless, and rare. Possibility is about the
578 same as us discovering intelligent life on another plant tomorrow. 580 same as us discovering intelligent life on another plant tomorrow.
@@ -640,6 +642,13 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
640 if (!(flg & TCP_FLAG_ACK)) 642 if (!(flg & TCP_FLAG_ACK))
641 return NULL; 643 return NULL;
642 644
645 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
646 if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
647 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
648 inet_rsk(req)->acked = 1;
649 return NULL;
650 }
651
643 /* OK, ACK is valid, create big socket and 652 /* OK, ACK is valid, create big socket and
644 * feed this segment to it. It will repeat all 653 * feed this segment to it. It will repeat all
645 * the tests. THIS SEGMENT MUST MOVE SOCKET TO 654 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
@@ -678,24 +687,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
678 inet_csk_reqsk_queue_unlink(sk, req, prev); 687 inet_csk_reqsk_queue_unlink(sk, req, prev);
679 inet_csk_reqsk_queue_removed(sk, req); 688 inet_csk_reqsk_queue_removed(sk, req);
680 689
681 if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && 690 inet_csk_reqsk_queue_add(sk, req, child);
682 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
683
684 /* the accept queue handling is done is est recv slow
685 * path so lets make sure to start there
686 */
687 tcp_sk(child)->pred_flags = 0;
688 sock_hold(sk);
689 sock_hold(child);
690 tcp_sk(child)->defer_tcp_accept.listen_sk = sk;
691 tcp_sk(child)->defer_tcp_accept.request = req;
692
693 inet_csk_reset_keepalive_timer(child,
694 inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ);
695 } else {
696 inet_csk_reqsk_queue_add(sk, req, child);
697 }
698
699 return child; 691 return child;
700 692
701 listen_overflow: 693 listen_overflow:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index debf2358160..ad993ecb481 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1836,7 +1836,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1836{ 1836{
1837 struct tcp_sock *tp = tcp_sk(sk); 1837 struct tcp_sock *tp = tcp_sk(sk);
1838 struct inet_connection_sock *icsk = inet_csk(sk); 1838 struct inet_connection_sock *icsk = inet_csk(sk);
1839 unsigned int cur_mss = tcp_current_mss(sk, 0); 1839 unsigned int cur_mss;
1840 int err; 1840 int err;
1841 1841
1842 /* Inconslusive MTU probe */ 1842 /* Inconslusive MTU probe */
@@ -1858,6 +1858,11 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1858 return -ENOMEM; 1858 return -ENOMEM;
1859 } 1859 }
1860 1860
1861 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
1862 return -EHOSTUNREACH; /* Routing failure or similar. */
1863
1864 cur_mss = tcp_current_mss(sk, 0);
1865
1861 /* If receiver has shrunk his window, and skb is out of 1866 /* If receiver has shrunk his window, and skb is out of
1862 * new window, do not retransmit it. The exception is the 1867 * new window, do not retransmit it. The exception is the
1863 * case, when window is shrunk to zero. In this case 1868 * case, when window is shrunk to zero. In this case
@@ -1884,9 +1889,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1884 (sysctl_tcp_retrans_collapse != 0)) 1889 (sysctl_tcp_retrans_collapse != 0))
1885 tcp_retrans_try_collapse(sk, skb, cur_mss); 1890 tcp_retrans_try_collapse(sk, skb, cur_mss);
1886 1891
1887 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
1888 return -EHOSTUNREACH; /* Routing failure or similar. */
1889
1890 /* Some Solaris stacks overoptimize and ignore the FIN on a 1892 /* Some Solaris stacks overoptimize and ignore the FIN on a
1891 * retransmit when old data is attached. So strip it off 1893 * retransmit when old data is attached. So strip it off
1892 * since it is cheap to do so and saves bytes on the network. 1894 * since it is cheap to do so and saves bytes on the network.
@@ -2129,6 +2131,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2129 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2131 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2130 if (tcp_transmit_skb(sk, skb, 0, priority)) 2132 if (tcp_transmit_skb(sk, skb, 0, priority))
2131 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); 2133 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
2134
2135 TCP_INC_STATS(TCP_MIB_OUTRSTS);
2132} 2136}
2133 2137
2134/* WARNING: This routine must only be called when we have already sent 2138/* WARNING: This routine must only be called when we have already sent
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 4de68cf5f2a..63ed9d6830e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -489,11 +489,6 @@ static void tcp_keepalive_timer (unsigned long data)
489 goto death; 489 goto death;
490 } 490 }
491 491
492 if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) {
493 tcp_send_active_reset(sk, GFP_ATOMIC);
494 goto death;
495 }
496
497 if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE) 492 if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
498 goto out; 493 goto out;
499 494
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index d3b709a6f26..cb1f0e83830 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -97,7 +97,7 @@ static int tunnel64_rcv(struct sk_buff *skb)
97{ 97{
98 struct xfrm_tunnel *handler; 98 struct xfrm_tunnel *handler;
99 99
100 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 100 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
101 goto drop; 101 goto drop;
102 102
103 for (handler = tunnel64_handlers; handler; handler = handler->next) 103 for (handler = tunnel64_handlers; handler; handler = handler->next)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index db1cb7c96d6..56fcda3694b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -420,7 +420,7 @@ void udp_err(struct sk_buff *skb, u32 info)
420/* 420/*
421 * Throw away all pending data and cancel the corking. Socket is locked. 421 * Throw away all pending data and cancel the corking. Socket is locked.
422 */ 422 */
423static void udp_flush_pending_frames(struct sock *sk) 423void udp_flush_pending_frames(struct sock *sk)
424{ 424{
425 struct udp_sock *up = udp_sk(sk); 425 struct udp_sock *up = udp_sk(sk);
426 426
@@ -430,6 +430,7 @@ static void udp_flush_pending_frames(struct sock *sk)
430 ip_flush_pending_frames(sk); 430 ip_flush_pending_frames(sk);
431 } 431 }
432} 432}
433EXPORT_SYMBOL(udp_flush_pending_frames);
433 434
434/** 435/**
435 * udp4_hwcsum_outgoing - handle outgoing HW checksumming 436 * udp4_hwcsum_outgoing - handle outgoing HW checksumming