aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/arp.c5
-rw-r--r--net/ipv4/devinet.c9
-rw-r--r--net/ipv4/fib_frontend.c1
-rw-r--r--net/ipv4/fib_semantics.c5
-rw-r--r--net/ipv4/inet_connection_sock.c11
-rw-r--r--net/ipv4/inet_fragment.c16
-rw-r--r--net/ipv4/inet_lro.c3
-rw-r--r--net/ipv4/ip_fragment.c2
-rw-r--r--net/ipv4/ip_gre.c146
-rw-r--r--net/ipv4/ipip.c130
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c3
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c14
-rw-r--r--net/ipv4/raw.c11
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv4/syncookies.c3
-rw-r--r--net/ipv4/tcp.c36
-rw-r--r--net/ipv4/tcp_input.c80
-rw-r--r--net/ipv4/tcp_ipv4.c20
-rw-r--r--net/ipv4/tcp_minisocks.c32
-rw-r--r--net/ipv4/tcp_output.c12
-rw-r--r--net/ipv4/tcp_timer.c5
-rw-r--r--net/ipv4/tunnel4.c2
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c2
24 files changed, 133 insertions, 422 deletions
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 418862f1bf22..9b539fa9fe18 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1288,7 +1288,6 @@ static void arp_format_neigh_entry(struct seq_file *seq,
1288 struct neighbour *n) 1288 struct neighbour *n)
1289{ 1289{
1290 char hbuffer[HBUFFERLEN]; 1290 char hbuffer[HBUFFERLEN];
1291 const char hexbuf[] = "0123456789ABCDEF";
1292 int k, j; 1291 int k, j;
1293 char tbuf[16]; 1292 char tbuf[16];
1294 struct net_device *dev = n->dev; 1293 struct net_device *dev = n->dev;
@@ -1302,8 +1301,8 @@ static void arp_format_neigh_entry(struct seq_file *seq,
1302 else { 1301 else {
1303#endif 1302#endif
1304 for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < dev->addr_len; j++) { 1303 for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < dev->addr_len; j++) {
1305 hbuffer[k++] = hexbuf[(n->ha[j] >> 4) & 15]; 1304 hbuffer[k++] = hex_asc_hi(n->ha[j]);
1306 hbuffer[k++] = hexbuf[n->ha[j] & 15]; 1305 hbuffer[k++] = hex_asc_lo(n->ha[j]);
1307 hbuffer[k++] = ':'; 1306 hbuffer[k++] = ':';
1308 } 1307 }
1309 hbuffer[--k] = 0; 1308 hbuffer[--k] = 0;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 6848e4760f34..79a7ef6209ff 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -90,7 +90,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90 [IFA_LOCAL] = { .type = NLA_U32 }, 90 [IFA_LOCAL] = { .type = NLA_U32 },
91 [IFA_ADDRESS] = { .type = NLA_U32 }, 91 [IFA_ADDRESS] = { .type = NLA_U32 },
92 [IFA_BROADCAST] = { .type = NLA_U32 }, 92 [IFA_BROADCAST] = { .type = NLA_U32 },
93 [IFA_ANYCAST] = { .type = NLA_U32 },
94 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, 93 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95}; 94};
96 95
@@ -536,9 +535,6 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
536 if (tb[IFA_BROADCAST]) 535 if (tb[IFA_BROADCAST])
537 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]); 536 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
538 537
539 if (tb[IFA_ANYCAST])
540 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
541
542 if (tb[IFA_LABEL]) 538 if (tb[IFA_LABEL])
543 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); 539 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
544 else 540 else
@@ -745,7 +741,6 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
745 break; 741 break;
746 inet_del_ifa(in_dev, ifap, 0); 742 inet_del_ifa(in_dev, ifap, 0);
747 ifa->ifa_broadcast = 0; 743 ifa->ifa_broadcast = 0;
748 ifa->ifa_anycast = 0;
749 ifa->ifa_scope = 0; 744 ifa->ifa_scope = 0;
750 } 745 }
751 746
@@ -1113,7 +1108,6 @@ static inline size_t inet_nlmsg_size(void)
1113 + nla_total_size(4) /* IFA_ADDRESS */ 1108 + nla_total_size(4) /* IFA_ADDRESS */
1114 + nla_total_size(4) /* IFA_LOCAL */ 1109 + nla_total_size(4) /* IFA_LOCAL */
1115 + nla_total_size(4) /* IFA_BROADCAST */ 1110 + nla_total_size(4) /* IFA_BROADCAST */
1116 + nla_total_size(4) /* IFA_ANYCAST */
1117 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ 1111 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1118} 1112}
1119 1113
@@ -1143,9 +1137,6 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1143 if (ifa->ifa_broadcast) 1137 if (ifa->ifa_broadcast)
1144 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast); 1138 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1145 1139
1146 if (ifa->ifa_anycast)
1147 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1148
1149 if (ifa->ifa_label[0]) 1140 if (ifa->ifa_label[0])
1150 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); 1141 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1151 1142
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 0f1557a4ac7a..0b2ac6a3d903 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -506,7 +506,6 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
506 [RTA_PREFSRC] = { .type = NLA_U32 }, 506 [RTA_PREFSRC] = { .type = NLA_U32 },
507 [RTA_METRICS] = { .type = NLA_NESTED }, 507 [RTA_METRICS] = { .type = NLA_NESTED },
508 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 508 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
509 [RTA_PROTOINFO] = { .type = NLA_U32 },
510 [RTA_FLOW] = { .type = NLA_U32 }, 509 [RTA_FLOW] = { .type = NLA_U32 },
511}; 510};
512 511
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3b83c34019fc..0d4d72827e4b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -960,7 +960,10 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
960 rtm->rtm_dst_len = dst_len; 960 rtm->rtm_dst_len = dst_len;
961 rtm->rtm_src_len = 0; 961 rtm->rtm_src_len = 0;
962 rtm->rtm_tos = tos; 962 rtm->rtm_tos = tos;
963 rtm->rtm_table = tb_id; 963 if (tb_id < 256)
964 rtm->rtm_table = tb_id;
965 else
966 rtm->rtm_table = RT_TABLE_COMPAT;
964 NLA_PUT_U32(skb, RTA_TABLE, tb_id); 967 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
965 rtm->rtm_type = type; 968 rtm->rtm_type = type;
966 rtm->rtm_flags = fi->fib_flags; 969 rtm->rtm_flags = fi->fib_flags;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 828ea211ff21..ec834480abe7 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -419,7 +419,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
419 struct inet_connection_sock *icsk = inet_csk(parent); 419 struct inet_connection_sock *icsk = inet_csk(parent);
420 struct request_sock_queue *queue = &icsk->icsk_accept_queue; 420 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
421 struct listen_sock *lopt = queue->listen_opt; 421 struct listen_sock *lopt = queue->listen_opt;
422 int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 422 int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
423 int thresh = max_retries;
423 unsigned long now = jiffies; 424 unsigned long now = jiffies;
424 struct request_sock **reqp, *req; 425 struct request_sock **reqp, *req;
425 int i, budget; 426 int i, budget;
@@ -455,6 +456,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
455 } 456 }
456 } 457 }
457 458
459 if (queue->rskq_defer_accept)
460 max_retries = queue->rskq_defer_accept;
461
458 budget = 2 * (lopt->nr_table_entries / (timeout / interval)); 462 budget = 2 * (lopt->nr_table_entries / (timeout / interval));
459 i = lopt->clock_hand; 463 i = lopt->clock_hand;
460 464
@@ -462,8 +466,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
462 reqp=&lopt->syn_table[i]; 466 reqp=&lopt->syn_table[i];
463 while ((req = *reqp) != NULL) { 467 while ((req = *reqp) != NULL) {
464 if (time_after_eq(now, req->expires)) { 468 if (time_after_eq(now, req->expires)) {
465 if (req->retrans < thresh && 469 if ((req->retrans < thresh ||
466 !req->rsk_ops->rtx_syn_ack(parent, req)) { 470 (inet_rsk(req)->acked && req->retrans < max_retries))
471 && !req->rsk_ops->rtx_syn_ack(parent, req)) {
467 unsigned long timeo; 472 unsigned long timeo;
468 473
469 if (req->retrans++ == 0) 474 if (req->retrans++ == 0)
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 4ed429bd5951..0546a0bc97ea 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -192,14 +192,21 @@ EXPORT_SYMBOL(inet_frag_evictor);
192 192
193static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, 193static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
194 struct inet_frag_queue *qp_in, struct inet_frags *f, 194 struct inet_frag_queue *qp_in, struct inet_frags *f,
195 unsigned int hash, void *arg) 195 void *arg)
196{ 196{
197 struct inet_frag_queue *qp; 197 struct inet_frag_queue *qp;
198#ifdef CONFIG_SMP 198#ifdef CONFIG_SMP
199 struct hlist_node *n; 199 struct hlist_node *n;
200#endif 200#endif
201 unsigned int hash;
201 202
202 write_lock(&f->lock); 203 write_lock(&f->lock);
204 /*
205 * While we stayed w/o the lock other CPU could update
206 * the rnd seed, so we need to re-calculate the hash
207 * chain. Fortunatelly the qp_in can be used to get one.
208 */
209 hash = f->hashfn(qp_in);
203#ifdef CONFIG_SMP 210#ifdef CONFIG_SMP
204 /* With SMP race we have to recheck hash table, because 211 /* With SMP race we have to recheck hash table, because
205 * such entry could be created on other cpu, while we 212 * such entry could be created on other cpu, while we
@@ -247,7 +254,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
247} 254}
248 255
249static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, 256static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
250 struct inet_frags *f, void *arg, unsigned int hash) 257 struct inet_frags *f, void *arg)
251{ 258{
252 struct inet_frag_queue *q; 259 struct inet_frag_queue *q;
253 260
@@ -255,7 +262,7 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
255 if (q == NULL) 262 if (q == NULL)
256 return NULL; 263 return NULL;
257 264
258 return inet_frag_intern(nf, q, f, hash, arg); 265 return inet_frag_intern(nf, q, f, arg);
259} 266}
260 267
261struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, 268struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
@@ -264,7 +271,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
264 struct inet_frag_queue *q; 271 struct inet_frag_queue *q;
265 struct hlist_node *n; 272 struct hlist_node *n;
266 273
267 read_lock(&f->lock);
268 hlist_for_each_entry(q, n, &f->hash[hash], list) { 274 hlist_for_each_entry(q, n, &f->hash[hash], list) {
269 if (q->net == nf && f->match(q, key)) { 275 if (q->net == nf && f->match(q, key)) {
270 atomic_inc(&q->refcnt); 276 atomic_inc(&q->refcnt);
@@ -274,6 +280,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
274 } 280 }
275 read_unlock(&f->lock); 281 read_unlock(&f->lock);
276 282
277 return inet_frag_create(nf, f, key, hash); 283 return inet_frag_create(nf, f, key);
278} 284}
279EXPORT_SYMBOL(inet_frag_find); 285EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index 4a4d49fca1f2..cfd034a2b96e 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -383,8 +383,7 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
383out2: /* send aggregated SKBs to stack */ 383out2: /* send aggregated SKBs to stack */
384 lro_flush(lro_mgr, lro_desc); 384 lro_flush(lro_mgr, lro_desc);
385 385
386out: /* Original SKB has to be posted to stack */ 386out:
387 skb->ip_summed = lro_mgr->ip_summed;
388 return 1; 387 return 1;
389} 388}
390 389
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cd6ce6ac6358..37221f659159 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -229,6 +229,8 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
229 229
230 arg.iph = iph; 230 arg.iph = iph;
231 arg.user = user; 231 arg.user = user;
232
233 read_lock(&ip4_frags.lock);
232 hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); 234 hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
233 235
234 q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); 236 q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2ada033406de..4342cba4ff82 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -313,9 +313,8 @@ static void ipgre_tunnel_uninit(struct net_device *dev)
313 313
314static void ipgre_err(struct sk_buff *skb, u32 info) 314static void ipgre_err(struct sk_buff *skb, u32 info)
315{ 315{
316#ifndef I_WISH_WORLD_WERE_PERFECT
317 316
318/* It is not :-( All the routers (except for Linux) return only 317/* All the routers (except for Linux) return only
319 8 bytes of packet payload. It means, that precise relaying of 318 8 bytes of packet payload. It means, that precise relaying of
320 ICMP in the real Internet is absolutely infeasible. 319 ICMP in the real Internet is absolutely infeasible.
321 320
@@ -398,149 +397,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
398out: 397out:
399 read_unlock(&ipgre_lock); 398 read_unlock(&ipgre_lock);
400 return; 399 return;
401#else
402 struct iphdr *iph = (struct iphdr*)dp;
403 struct iphdr *eiph;
404 __be16 *p = (__be16*)(dp+(iph->ihl<<2));
405 const int type = icmp_hdr(skb)->type;
406 const int code = icmp_hdr(skb)->code;
407 int rel_type = 0;
408 int rel_code = 0;
409 __be32 rel_info = 0;
410 __u32 n = 0;
411 __be16 flags;
412 int grehlen = (iph->ihl<<2) + 4;
413 struct sk_buff *skb2;
414 struct flowi fl;
415 struct rtable *rt;
416
417 if (p[1] != htons(ETH_P_IP))
418 return;
419
420 flags = p[0];
421 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
422 if (flags&(GRE_VERSION|GRE_ROUTING))
423 return;
424 if (flags&GRE_CSUM)
425 grehlen += 4;
426 if (flags&GRE_KEY)
427 grehlen += 4;
428 if (flags&GRE_SEQ)
429 grehlen += 4;
430 }
431 if (len < grehlen + sizeof(struct iphdr))
432 return;
433 eiph = (struct iphdr*)(dp + grehlen);
434
435 switch (type) {
436 default:
437 return;
438 case ICMP_PARAMETERPROB:
439 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
440 if (n < (iph->ihl<<2))
441 return;
442
443 /* So... This guy found something strange INSIDE encapsulated
444 packet. Well, he is fool, but what can we do ?
445 */
446 rel_type = ICMP_PARAMETERPROB;
447 n -= grehlen;
448 rel_info = htonl(n << 24);
449 break;
450
451 case ICMP_DEST_UNREACH:
452 switch (code) {
453 case ICMP_SR_FAILED:
454 case ICMP_PORT_UNREACH:
455 /* Impossible event. */
456 return;
457 case ICMP_FRAG_NEEDED:
458 /* And it is the only really necessary thing :-) */
459 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
460 if (n < grehlen+68)
461 return;
462 n -= grehlen;
463 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
464 if (n > ntohs(eiph->tot_len))
465 return;
466 rel_info = htonl(n);
467 break;
468 default:
469 /* All others are translated to HOST_UNREACH.
470 rfc2003 contains "deep thoughts" about NET_UNREACH,
471 I believe, it is just ether pollution. --ANK
472 */
473 rel_type = ICMP_DEST_UNREACH;
474 rel_code = ICMP_HOST_UNREACH;
475 break;
476 }
477 break;
478 case ICMP_TIME_EXCEEDED:
479 if (code != ICMP_EXC_TTL)
480 return;
481 break;
482 }
483
484 /* Prepare fake skb to feed it to icmp_send */
485 skb2 = skb_clone(skb, GFP_ATOMIC);
486 if (skb2 == NULL)
487 return;
488 dst_release(skb2->dst);
489 skb2->dst = NULL;
490 skb_pull(skb2, skb->data - (u8*)eiph);
491 skb_reset_network_header(skb2);
492
493 /* Try to guess incoming interface */
494 memset(&fl, 0, sizeof(fl));
495 fl.fl4_dst = eiph->saddr;
496 fl.fl4_tos = RT_TOS(eiph->tos);
497 fl.proto = IPPROTO_GRE;
498 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) {
499 kfree_skb(skb2);
500 return;
501 }
502 skb2->dev = rt->u.dst.dev;
503
504 /* route "incoming" packet */
505 if (rt->rt_flags&RTCF_LOCAL) {
506 ip_rt_put(rt);
507 rt = NULL;
508 fl.fl4_dst = eiph->daddr;
509 fl.fl4_src = eiph->saddr;
510 fl.fl4_tos = eiph->tos;
511 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
512 rt->u.dst.dev->type != ARPHRD_IPGRE) {
513 ip_rt_put(rt);
514 kfree_skb(skb2);
515 return;
516 }
517 } else {
518 ip_rt_put(rt);
519 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
520 skb2->dst->dev->type != ARPHRD_IPGRE) {
521 kfree_skb(skb2);
522 return;
523 }
524 }
525
526 /* change mtu on this route */
527 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
528 if (n > dst_mtu(skb2->dst)) {
529 kfree_skb(skb2);
530 return;
531 }
532 skb2->dst->ops->update_pmtu(skb2->dst, n);
533 } else if (type == ICMP_TIME_EXCEEDED) {
534 struct ip_tunnel *t = netdev_priv(skb2->dev);
535 if (t->parms.iph.ttl) {
536 rel_type = ICMP_DEST_UNREACH;
537 rel_code = ICMP_HOST_UNREACH;
538 }
539 }
540
541 icmp_send(skb2, rel_type, rel_code, rel_info);
542 kfree_skb(skb2);
543#endif
544} 400}
545 401
546static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 402static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 149111f08e8d..af5cb53da5cc 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -278,9 +278,8 @@ static void ipip_tunnel_uninit(struct net_device *dev)
278 278
279static int ipip_err(struct sk_buff *skb, u32 info) 279static int ipip_err(struct sk_buff *skb, u32 info)
280{ 280{
281#ifndef I_WISH_WORLD_WERE_PERFECT
282 281
283/* It is not :-( All the routers (except for Linux) return only 282/* All the routers (except for Linux) return only
284 8 bytes of packet payload. It means, that precise relaying of 283 8 bytes of packet payload. It means, that precise relaying of
285 ICMP in the real Internet is absolutely infeasible. 284 ICMP in the real Internet is absolutely infeasible.
286 */ 285 */
@@ -337,133 +336,6 @@ static int ipip_err(struct sk_buff *skb, u32 info)
337out: 336out:
338 read_unlock(&ipip_lock); 337 read_unlock(&ipip_lock);
339 return err; 338 return err;
340#else
341 struct iphdr *iph = (struct iphdr*)dp;
342 int hlen = iph->ihl<<2;
343 struct iphdr *eiph;
344 const int type = icmp_hdr(skb)->type;
345 const int code = icmp_hdr(skb)->code;
346 int rel_type = 0;
347 int rel_code = 0;
348 __be32 rel_info = 0;
349 __u32 n = 0;
350 struct sk_buff *skb2;
351 struct flowi fl;
352 struct rtable *rt;
353
354 if (len < hlen + sizeof(struct iphdr))
355 return 0;
356 eiph = (struct iphdr*)(dp + hlen);
357
358 switch (type) {
359 default:
360 return 0;
361 case ICMP_PARAMETERPROB:
362 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
363 if (n < hlen)
364 return 0;
365
366 /* So... This guy found something strange INSIDE encapsulated
367 packet. Well, he is fool, but what can we do ?
368 */
369 rel_type = ICMP_PARAMETERPROB;
370 rel_info = htonl((n - hlen) << 24);
371 break;
372
373 case ICMP_DEST_UNREACH:
374 switch (code) {
375 case ICMP_SR_FAILED:
376 case ICMP_PORT_UNREACH:
377 /* Impossible event. */
378 return 0;
379 case ICMP_FRAG_NEEDED:
380 /* And it is the only really necessary thing :-) */
381 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
382 if (n < hlen+68)
383 return 0;
384 n -= hlen;
385 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
386 if (n > ntohs(eiph->tot_len))
387 return 0;
388 rel_info = htonl(n);
389 break;
390 default:
391 /* All others are translated to HOST_UNREACH.
392 rfc2003 contains "deep thoughts" about NET_UNREACH,
393 I believe, it is just ether pollution. --ANK
394 */
395 rel_type = ICMP_DEST_UNREACH;
396 rel_code = ICMP_HOST_UNREACH;
397 break;
398 }
399 break;
400 case ICMP_TIME_EXCEEDED:
401 if (code != ICMP_EXC_TTL)
402 return 0;
403 break;
404 }
405
406 /* Prepare fake skb to feed it to icmp_send */
407 skb2 = skb_clone(skb, GFP_ATOMIC);
408 if (skb2 == NULL)
409 return 0;
410 dst_release(skb2->dst);
411 skb2->dst = NULL;
412 skb_pull(skb2, skb->data - (u8*)eiph);
413 skb_reset_network_header(skb2);
414
415 /* Try to guess incoming interface */
416 memset(&fl, 0, sizeof(fl));
417 fl.fl4_daddr = eiph->saddr;
418 fl.fl4_tos = RT_TOS(eiph->tos);
419 fl.proto = IPPROTO_IPIP;
420 if (ip_route_output_key(dev_net(skb->dev), &rt, &key)) {
421 kfree_skb(skb2);
422 return 0;
423 }
424 skb2->dev = rt->u.dst.dev;
425
426 /* route "incoming" packet */
427 if (rt->rt_flags&RTCF_LOCAL) {
428 ip_rt_put(rt);
429 rt = NULL;
430 fl.fl4_daddr = eiph->daddr;
431 fl.fl4_src = eiph->saddr;
432 fl.fl4_tos = eiph->tos;
433 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
434 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
435 ip_rt_put(rt);
436 kfree_skb(skb2);
437 return 0;
438 }
439 } else {
440 ip_rt_put(rt);
441 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
442 skb2->dst->dev->type != ARPHRD_TUNNEL) {
443 kfree_skb(skb2);
444 return 0;
445 }
446 }
447
448 /* change mtu on this route */
449 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
450 if (n > dst_mtu(skb2->dst)) {
451 kfree_skb(skb2);
452 return 0;
453 }
454 skb2->dst->ops->update_pmtu(skb2->dst, n);
455 } else if (type == ICMP_TIME_EXCEEDED) {
456 struct ip_tunnel *t = netdev_priv(skb2->dev);
457 if (t->parms.iph.ttl) {
458 rel_type = ICMP_DEST_UNREACH;
459 rel_code = ICMP_HOST_UNREACH;
460 }
461 }
462
463 icmp_send(skb2, rel_type, rel_code, rel_info);
464 kfree_skb(skb2);
465 return 0;
466#endif
467} 339}
468 340
469static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph, 341static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 04578593e100..d2a887fc8d9b 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -556,7 +556,6 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
556 556
557 spin_lock_bh(&nf_nat_lock); 557 spin_lock_bh(&nf_nat_lock);
558 hlist_del_rcu(&nat->bysource); 558 hlist_del_rcu(&nat->bysource);
559 nat->ct = NULL;
560 spin_unlock_bh(&nf_nat_lock); 559 spin_unlock_bh(&nf_nat_lock);
561} 560}
562 561
@@ -570,8 +569,8 @@ static void nf_nat_move_storage(void *new, void *old)
570 return; 569 return;
571 570
572 spin_lock_bh(&nf_nat_lock); 571 spin_lock_bh(&nf_nat_lock);
573 hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
574 new_nat->ct = ct; 572 new_nat->ct = ct;
573 hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
575 spin_unlock_bh(&nf_nat_lock); 574 spin_unlock_bh(&nf_nat_lock);
576} 575}
577 576
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 5daefad3d193..7750c97fde7b 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -232,6 +232,11 @@ static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
232 } 232 }
233 } 233 }
234 } 234 }
235
236 /* don't trust len bigger than ctx buffer */
237 if (*len > ctx->end - ctx->pointer)
238 return 0;
239
235 return 1; 240 return 1;
236} 241}
237 242
@@ -250,6 +255,10 @@ static unsigned char asn1_header_decode(struct asn1_ctx *ctx,
250 if (!asn1_length_decode(ctx, &def, &len)) 255 if (!asn1_length_decode(ctx, &def, &len))
251 return 0; 256 return 0;
252 257
258 /* primitive shall be definite, indefinite shall be constructed */
259 if (*con == ASN1_PRI && !def)
260 return 0;
261
253 if (def) 262 if (def)
254 *eoc = ctx->pointer + len; 263 *eoc = ctx->pointer + len;
255 else 264 else
@@ -434,6 +443,11 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
434 unsigned long *optr; 443 unsigned long *optr;
435 444
436 size = eoc - ctx->pointer + 1; 445 size = eoc - ctx->pointer + 1;
446
447 /* first subid actually encodes first two subids */
448 if (size < 2 || size > ULONG_MAX/sizeof(unsigned long))
449 return 0;
450
437 *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); 451 *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
438 if (*oid == NULL) { 452 if (*oid == NULL) {
439 if (net_ratelimit()) 453 if (net_ratelimit())
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index fead049daf43..37a1ecd9d600 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -608,6 +608,14 @@ static void raw_close(struct sock *sk, long timeout)
608 sk_common_release(sk); 608 sk_common_release(sk);
609} 609}
610 610
611static int raw_destroy(struct sock *sk)
612{
613 lock_sock(sk);
614 ip_flush_pending_frames(sk);
615 release_sock(sk);
616 return 0;
617}
618
611/* This gets rid of all the nasties in af_inet. -DaveM */ 619/* This gets rid of all the nasties in af_inet. -DaveM */
612static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) 620static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
613{ 621{
@@ -820,6 +828,7 @@ struct proto raw_prot = {
820 .name = "RAW", 828 .name = "RAW",
821 .owner = THIS_MODULE, 829 .owner = THIS_MODULE,
822 .close = raw_close, 830 .close = raw_close,
831 .destroy = raw_destroy,
823 .connect = ip4_datagram_connect, 832 .connect = ip4_datagram_connect,
824 .disconnect = udp_disconnect, 833 .disconnect = udp_disconnect,
825 .ioctl = raw_ioctl, 834 .ioctl = raw_ioctl,
@@ -925,7 +934,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
925 srcp = inet->num; 934 srcp = inet->num;
926 935
927 seq_printf(seq, "%4d: %08X:%04X %08X:%04X" 936 seq_printf(seq, "%4d: %08X:%04X %08X:%04X"
928 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d", 937 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
929 i, src, srcp, dest, destp, sp->sk_state, 938 i, src, srcp, dest, destp, sp->sk_state,
930 atomic_read(&sp->sk_wmem_alloc), 939 atomic_read(&sp->sk_wmem_alloc),
931 atomic_read(&sp->sk_rmem_alloc), 940 atomic_read(&sp->sk_rmem_alloc),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 92f90ae46f4a..96be336064fb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -160,7 +160,7 @@ static struct dst_ops ipv4_dst_ops = {
160 .negative_advice = ipv4_negative_advice, 160 .negative_advice = ipv4_negative_advice,
161 .link_failure = ipv4_link_failure, 161 .link_failure = ipv4_link_failure,
162 .update_pmtu = ip_rt_update_pmtu, 162 .update_pmtu = ip_rt_update_pmtu,
163 .local_out = ip_local_out, 163 .local_out = __ip_local_out,
164 .entry_size = sizeof(struct rtable), 164 .entry_size = sizeof(struct rtable),
165 .entries = ATOMIC_INIT(0), 165 .entries = ATOMIC_INIT(0),
166}; 166};
@@ -1792,7 +1792,7 @@ static int __mkroute_input(struct sk_buff *skb,
1792 if (err) 1792 if (err)
1793 flags |= RTCF_DIRECTSRC; 1793 flags |= RTCF_DIRECTSRC;
1794 1794
1795 if (out_dev == in_dev && err && !(flags & RTCF_MASQ) && 1795 if (out_dev == in_dev && err &&
1796 (IN_DEV_SHARED_MEDIA(out_dev) || 1796 (IN_DEV_SHARED_MEDIA(out_dev) ||
1797 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) 1797 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1798 flags |= RTCF_DOREDIRECT; 1798 flags |= RTCF_DOREDIRECT;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 73ba98921d64..d182a2a26291 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -285,7 +285,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
285 cookie_check_timestamp(&tcp_opt); 285 cookie_check_timestamp(&tcp_opt);
286 286
287 ret = NULL; 287 ret = NULL;
288 req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */ 288 req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
289 if (!req) 289 if (!req)
290 goto out; 290 goto out;
291 291
@@ -301,7 +301,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
301 ireq->rmt_port = th->source; 301 ireq->rmt_port = th->source;
302 ireq->loc_addr = ip_hdr(skb)->daddr; 302 ireq->loc_addr = ip_hdr(skb)->daddr;
303 ireq->rmt_addr = ip_hdr(skb)->saddr; 303 ireq->rmt_addr = ip_hdr(skb)->saddr;
304 ireq->opt = NULL;
305 ireq->snd_wscale = tcp_opt.snd_wscale; 304 ireq->snd_wscale = tcp_opt.snd_wscale;
306 ireq->rcv_wscale = tcp_opt.rcv_wscale; 305 ireq->rcv_wscale = tcp_opt.rcv_wscale;
307 ireq->sack_ok = tcp_opt.sack_ok; 306 ireq->sack_ok = tcp_opt.sack_ok;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f88653138621..850825dc86e6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -260,6 +260,8 @@
260#include <linux/socket.h> 260#include <linux/socket.h>
261#include <linux/random.h> 261#include <linux/random.h>
262#include <linux/bootmem.h> 262#include <linux/bootmem.h>
263#include <linux/highmem.h>
264#include <linux/swap.h>
263#include <linux/cache.h> 265#include <linux/cache.h>
264#include <linux/err.h> 266#include <linux/err.h>
265#include <linux/crypto.h> 267#include <linux/crypto.h>
@@ -1227,7 +1229,14 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1227 copied += used; 1229 copied += used;
1228 offset += used; 1230 offset += used;
1229 } 1231 }
1230 if (offset != skb->len) 1232 /*
1233 * If recv_actor drops the lock (e.g. TCP splice
1234 * receive) the skb pointer might be invalid when
1235 * getting here: tcp_collapse might have deleted it
1236 * while aggregating skbs from the socket queue.
1237 */
1238 skb = tcp_recv_skb(sk, seq-1, &offset);
1239 if (!skb || (offset+1 != skb->len))
1231 break; 1240 break;
1232 } 1241 }
1233 if (tcp_hdr(skb)->fin) { 1242 if (tcp_hdr(skb)->fin) {
@@ -2105,12 +2114,15 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2105 break; 2114 break;
2106 2115
2107 case TCP_DEFER_ACCEPT: 2116 case TCP_DEFER_ACCEPT:
2108 if (val < 0) { 2117 icsk->icsk_accept_queue.rskq_defer_accept = 0;
2109 err = -EINVAL; 2118 if (val > 0) {
2110 } else { 2119 /* Translate value in seconds to number of
2111 if (val > MAX_TCP_ACCEPT_DEFERRED) 2120 * retransmits */
2112 val = MAX_TCP_ACCEPT_DEFERRED; 2121 while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
2113 icsk->icsk_accept_queue.rskq_defer_accept = val; 2122 val > ((TCP_TIMEOUT_INIT / HZ) <<
2123 icsk->icsk_accept_queue.rskq_defer_accept))
2124 icsk->icsk_accept_queue.rskq_defer_accept++;
2125 icsk->icsk_accept_queue.rskq_defer_accept++;
2114 } 2126 }
2115 break; 2127 break;
2116 2128
@@ -2292,7 +2304,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2292 val = (val ? : sysctl_tcp_fin_timeout) / HZ; 2304 val = (val ? : sysctl_tcp_fin_timeout) / HZ;
2293 break; 2305 break;
2294 case TCP_DEFER_ACCEPT: 2306 case TCP_DEFER_ACCEPT:
2295 val = icsk->icsk_accept_queue.rskq_defer_accept; 2307 val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
2308 ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
2296 break; 2309 break;
2297 case TCP_WINDOW_CLAMP: 2310 case TCP_WINDOW_CLAMP:
2298 val = tp->window_clamp; 2311 val = tp->window_clamp;
@@ -2609,7 +2622,7 @@ __setup("thash_entries=", set_thash_entries);
2609void __init tcp_init(void) 2622void __init tcp_init(void)
2610{ 2623{
2611 struct sk_buff *skb = NULL; 2624 struct sk_buff *skb = NULL;
2612 unsigned long limit; 2625 unsigned long nr_pages, limit;
2613 int order, i, max_share; 2626 int order, i, max_share;
2614 2627
2615 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); 2628 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
@@ -2678,8 +2691,9 @@ void __init tcp_init(void)
2678 * is up to 1/2 at 256 MB, decreasing toward zero with the amount of 2691 * is up to 1/2 at 256 MB, decreasing toward zero with the amount of
2679 * memory, with a floor of 128 pages. 2692 * memory, with a floor of 128 pages.
2680 */ 2693 */
2681 limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); 2694 nr_pages = totalram_pages - totalhigh_pages;
2682 limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); 2695 limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
2696 limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
2683 limit = max(limit, 128UL); 2697 limit = max(limit, 128UL);
2684 sysctl_tcp_mem[0] = limit / 4 * 3; 2698 sysctl_tcp_mem[0] = limit / 4 * 3;
2685 sysctl_tcp_mem[1] = limit; 2699 sysctl_tcp_mem[1] = limit;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b54d9d37b636..cad73b7dfef0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1392,9 +1392,9 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1392 1392
1393 if (before(next_dup->start_seq, skip_to_seq)) { 1393 if (before(next_dup->start_seq, skip_to_seq)) {
1394 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count); 1394 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count);
1395 tcp_sacktag_walk(skb, sk, NULL, 1395 skb = tcp_sacktag_walk(skb, sk, NULL,
1396 next_dup->start_seq, next_dup->end_seq, 1396 next_dup->start_seq, next_dup->end_seq,
1397 1, fack_count, reord, flag); 1397 1, fack_count, reord, flag);
1398 } 1398 }
1399 1399
1400 return skb; 1400 return skb;
@@ -2483,6 +2483,20 @@ static inline void tcp_complete_cwr(struct sock *sk)
2483 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); 2483 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2484} 2484}
2485 2485
2486static void tcp_try_keep_open(struct sock *sk)
2487{
2488 struct tcp_sock *tp = tcp_sk(sk);
2489 int state = TCP_CA_Open;
2490
2491 if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker)
2492 state = TCP_CA_Disorder;
2493
2494 if (inet_csk(sk)->icsk_ca_state != state) {
2495 tcp_set_ca_state(sk, state);
2496 tp->high_seq = tp->snd_nxt;
2497 }
2498}
2499
2486static void tcp_try_to_open(struct sock *sk, int flag) 2500static void tcp_try_to_open(struct sock *sk, int flag)
2487{ 2501{
2488 struct tcp_sock *tp = tcp_sk(sk); 2502 struct tcp_sock *tp = tcp_sk(sk);
@@ -2496,15 +2510,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
2496 tcp_enter_cwr(sk, 1); 2510 tcp_enter_cwr(sk, 1);
2497 2511
2498 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { 2512 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2499 int state = TCP_CA_Open; 2513 tcp_try_keep_open(sk);
2500
2501 if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker)
2502 state = TCP_CA_Disorder;
2503
2504 if (inet_csk(sk)->icsk_ca_state != state) {
2505 tcp_set_ca_state(sk, state);
2506 tp->high_seq = tp->snd_nxt;
2507 }
2508 tcp_moderate_cwnd(tp); 2514 tcp_moderate_cwnd(tp);
2509 } else { 2515 } else {
2510 tcp_cwnd_down(sk, flag); 2516 tcp_cwnd_down(sk, flag);
@@ -3310,8 +3316,11 @@ no_queue:
3310 return 1; 3316 return 1;
3311 3317
3312old_ack: 3318old_ack:
3313 if (TCP_SKB_CB(skb)->sacked) 3319 if (TCP_SKB_CB(skb)->sacked) {
3314 tcp_sacktag_write_queue(sk, skb, prior_snd_una); 3320 tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3321 if (icsk->icsk_ca_state == TCP_CA_Open)
3322 tcp_try_keep_open(sk);
3323 }
3315 3324
3316uninteresting_ack: 3325uninteresting_ack:
3317 SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt); 3326 SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
@@ -4532,49 +4541,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
4532 } 4541 }
4533} 4542}
4534 4543
4535static int tcp_defer_accept_check(struct sock *sk)
4536{
4537 struct tcp_sock *tp = tcp_sk(sk);
4538
4539 if (tp->defer_tcp_accept.request) {
4540 int queued_data = tp->rcv_nxt - tp->copied_seq;
4541 int hasfin = !skb_queue_empty(&sk->sk_receive_queue) ?
4542 tcp_hdr((struct sk_buff *)
4543 sk->sk_receive_queue.prev)->fin : 0;
4544
4545 if (queued_data && hasfin)
4546 queued_data--;
4547
4548 if (queued_data &&
4549 tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) {
4550 if (sock_flag(sk, SOCK_KEEPOPEN)) {
4551 inet_csk_reset_keepalive_timer(sk,
4552 keepalive_time_when(tp));
4553 } else {
4554 inet_csk_delete_keepalive_timer(sk);
4555 }
4556
4557 inet_csk_reqsk_queue_add(
4558 tp->defer_tcp_accept.listen_sk,
4559 tp->defer_tcp_accept.request,
4560 sk);
4561
4562 tp->defer_tcp_accept.listen_sk->sk_data_ready(
4563 tp->defer_tcp_accept.listen_sk, 0);
4564
4565 sock_put(tp->defer_tcp_accept.listen_sk);
4566 sock_put(sk);
4567 tp->defer_tcp_accept.listen_sk = NULL;
4568 tp->defer_tcp_accept.request = NULL;
4569 } else if (hasfin ||
4570 tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) {
4571 tcp_reset(sk);
4572 return -1;
4573 }
4574 }
4575 return 0;
4576}
4577
4578static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) 4544static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
4579{ 4545{
4580 struct tcp_sock *tp = tcp_sk(sk); 4546 struct tcp_sock *tp = tcp_sk(sk);
@@ -4935,8 +4901,6 @@ step5:
4935 4901
4936 tcp_data_snd_check(sk); 4902 tcp_data_snd_check(sk);
4937 tcp_ack_snd_check(sk); 4903 tcp_ack_snd_check(sk);
4938
4939 tcp_defer_accept_check(sk);
4940 return 0; 4904 return 0;
4941 4905
4942csum_error: 4906csum_error:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cd601a866c2f..ffe869ac1bcf 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -85,10 +85,6 @@
85int sysctl_tcp_tw_reuse __read_mostly; 85int sysctl_tcp_tw_reuse __read_mostly;
86int sysctl_tcp_low_latency __read_mostly; 86int sysctl_tcp_low_latency __read_mostly;
87 87
88/* Check TCP sequence numbers in ICMP packets. */
89#define ICMP_MIN_LENGTH 8
90
91void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
92 88
93#ifdef CONFIG_TCP_MD5SIG 89#ifdef CONFIG_TCP_MD5SIG
94static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, 90static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
@@ -1285,7 +1281,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1285 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 1281 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1286 goto drop; 1282 goto drop;
1287 1283
1288 req = reqsk_alloc(&tcp_request_sock_ops); 1284 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1289 if (!req) 1285 if (!req)
1290 goto drop; 1286 goto drop;
1291 1287
@@ -1918,14 +1914,6 @@ int tcp_v4_destroy_sock(struct sock *sk)
1918 sk->sk_sndmsg_page = NULL; 1914 sk->sk_sndmsg_page = NULL;
1919 } 1915 }
1920 1916
1921 if (tp->defer_tcp_accept.request) {
1922 reqsk_free(tp->defer_tcp_accept.request);
1923 sock_put(tp->defer_tcp_accept.listen_sk);
1924 sock_put(sk);
1925 tp->defer_tcp_accept.listen_sk = NULL;
1926 tp->defer_tcp_accept.request = NULL;
1927 }
1928
1929 atomic_dec(&tcp_sockets_allocated); 1917 atomic_dec(&tcp_sockets_allocated);
1930 1918
1931 return 0; 1919 return 0;
@@ -2303,7 +2291,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2303 } 2291 }
2304 2292
2305 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 2293 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2306 "%08X %5d %8d %lu %d %p %u %u %u %u %d%n", 2294 "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
2307 i, src, srcp, dest, destp, sk->sk_state, 2295 i, src, srcp, dest, destp, sk->sk_state,
2308 tp->write_seq - tp->snd_una, 2296 tp->write_seq - tp->snd_una,
2309 sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog : 2297 sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
@@ -2315,8 +2303,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2315 icsk->icsk_probes_out, 2303 icsk->icsk_probes_out,
2316 sock_i_ino(sk), 2304 sock_i_ino(sk),
2317 atomic_read(&sk->sk_refcnt), sk, 2305 atomic_read(&sk->sk_refcnt), sk,
2318 icsk->icsk_rto, 2306 jiffies_to_clock_t(icsk->icsk_rto),
2319 icsk->icsk_ack.ato, 2307 jiffies_to_clock_t(icsk->icsk_ack.ato),
2320 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 2308 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2321 tp->snd_cwnd, 2309 tp->snd_cwnd,
2322 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh, 2310 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 019c8c16e5cc..8245247a6ceb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -571,8 +571,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
571 does sequence test, SYN is truncated, and thus we consider 571 does sequence test, SYN is truncated, and thus we consider
572 it a bare ACK. 572 it a bare ACK.
573 573
574 Both ends (listening sockets) accept the new incoming 574 If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
575 connection and try to talk to each other. 8-) 575 bare ACK. Otherwise, we create an established connection. Both
576 ends (listening sockets) accept the new incoming connection and try
577 to talk to each other. 8-)
576 578
577 Note: This case is both harmless, and rare. Possibility is about the 579 Note: This case is both harmless, and rare. Possibility is about the
578 same as us discovering intelligent life on another plant tomorrow. 580 same as us discovering intelligent life on another plant tomorrow.
@@ -640,6 +642,13 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
640 if (!(flg & TCP_FLAG_ACK)) 642 if (!(flg & TCP_FLAG_ACK))
641 return NULL; 643 return NULL;
642 644
645 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
646 if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
647 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
648 inet_rsk(req)->acked = 1;
649 return NULL;
650 }
651
643 /* OK, ACK is valid, create big socket and 652 /* OK, ACK is valid, create big socket and
644 * feed this segment to it. It will repeat all 653 * feed this segment to it. It will repeat all
645 * the tests. THIS SEGMENT MUST MOVE SOCKET TO 654 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
@@ -678,24 +687,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
678 inet_csk_reqsk_queue_unlink(sk, req, prev); 687 inet_csk_reqsk_queue_unlink(sk, req, prev);
679 inet_csk_reqsk_queue_removed(sk, req); 688 inet_csk_reqsk_queue_removed(sk, req);
680 689
681 if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && 690 inet_csk_reqsk_queue_add(sk, req, child);
682 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
683
684 /* the accept queue handling is done is est recv slow
685 * path so lets make sure to start there
686 */
687 tcp_sk(child)->pred_flags = 0;
688 sock_hold(sk);
689 sock_hold(child);
690 tcp_sk(child)->defer_tcp_accept.listen_sk = sk;
691 tcp_sk(child)->defer_tcp_accept.request = req;
692
693 inet_csk_reset_keepalive_timer(child,
694 inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ);
695 } else {
696 inet_csk_reqsk_queue_add(sk, req, child);
697 }
698
699 return child; 691 return child;
700 692
701 listen_overflow: 693 listen_overflow:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index debf23581606..ad993ecb4810 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1836,7 +1836,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1836{ 1836{
1837 struct tcp_sock *tp = tcp_sk(sk); 1837 struct tcp_sock *tp = tcp_sk(sk);
1838 struct inet_connection_sock *icsk = inet_csk(sk); 1838 struct inet_connection_sock *icsk = inet_csk(sk);
1839 unsigned int cur_mss = tcp_current_mss(sk, 0); 1839 unsigned int cur_mss;
1840 int err; 1840 int err;
1841 1841
1842 /* Inconslusive MTU probe */ 1842 /* Inconslusive MTU probe */
@@ -1858,6 +1858,11 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1858 return -ENOMEM; 1858 return -ENOMEM;
1859 } 1859 }
1860 1860
1861 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
1862 return -EHOSTUNREACH; /* Routing failure or similar. */
1863
1864 cur_mss = tcp_current_mss(sk, 0);
1865
1861 /* If receiver has shrunk his window, and skb is out of 1866 /* If receiver has shrunk his window, and skb is out of
1862 * new window, do not retransmit it. The exception is the 1867 * new window, do not retransmit it. The exception is the
1863 * case, when window is shrunk to zero. In this case 1868 * case, when window is shrunk to zero. In this case
@@ -1884,9 +1889,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1884 (sysctl_tcp_retrans_collapse != 0)) 1889 (sysctl_tcp_retrans_collapse != 0))
1885 tcp_retrans_try_collapse(sk, skb, cur_mss); 1890 tcp_retrans_try_collapse(sk, skb, cur_mss);
1886 1891
1887 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
1888 return -EHOSTUNREACH; /* Routing failure or similar. */
1889
1890 /* Some Solaris stacks overoptimize and ignore the FIN on a 1892 /* Some Solaris stacks overoptimize and ignore the FIN on a
1891 * retransmit when old data is attached. So strip it off 1893 * retransmit when old data is attached. So strip it off
1892 * since it is cheap to do so and saves bytes on the network. 1894 * since it is cheap to do so and saves bytes on the network.
@@ -2129,6 +2131,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2129 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2131 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2130 if (tcp_transmit_skb(sk, skb, 0, priority)) 2132 if (tcp_transmit_skb(sk, skb, 0, priority))
2131 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); 2133 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
2134
2135 TCP_INC_STATS(TCP_MIB_OUTRSTS);
2132} 2136}
2133 2137
2134/* WARNING: This routine must only be called when we have already sent 2138/* WARNING: This routine must only be called when we have already sent
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 4de68cf5f2aa..63ed9d6830e7 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -489,11 +489,6 @@ static void tcp_keepalive_timer (unsigned long data)
489 goto death; 489 goto death;
490 } 490 }
491 491
492 if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) {
493 tcp_send_active_reset(sk, GFP_ATOMIC);
494 goto death;
495 }
496
497 if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE) 492 if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
498 goto out; 493 goto out;
499 494
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index d3b709a6f264..cb1f0e83830b 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -97,7 +97,7 @@ static int tunnel64_rcv(struct sk_buff *skb)
97{ 97{
98 struct xfrm_tunnel *handler; 98 struct xfrm_tunnel *handler;
99 99
100 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 100 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
101 goto drop; 101 goto drop;
102 102
103 for (handler = tunnel64_handlers; handler; handler = handler->next) 103 for (handler = tunnel64_handlers; handler; handler = handler->next)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index db1cb7c96d63..56fcda3694ba 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -420,7 +420,7 @@ void udp_err(struct sk_buff *skb, u32 info)
420/* 420/*
421 * Throw away all pending data and cancel the corking. Socket is locked. 421 * Throw away all pending data and cancel the corking. Socket is locked.
422 */ 422 */
423static void udp_flush_pending_frames(struct sock *sk) 423void udp_flush_pending_frames(struct sock *sk)
424{ 424{
425 struct udp_sock *up = udp_sk(sk); 425 struct udp_sock *up = udp_sk(sk);
426 426
@@ -430,6 +430,7 @@ static void udp_flush_pending_frames(struct sock *sk)
430 ip_flush_pending_frames(sk); 430 ip_flush_pending_frames(sk);
431 } 431 }
432} 432}
433EXPORT_SYMBOL(udp_flush_pending_frames);
433 434
434/** 435/**
435 * udp4_hwcsum_outgoing - handle outgoing HW checksumming 436 * udp4_hwcsum_outgoing - handle outgoing HW checksumming
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 584e6d74e3a9..7135279f3f84 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -52,7 +52,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
52 IP_ECN_clear(top_iph); 52 IP_ECN_clear(top_iph);
53 53
54 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? 54 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
55 0 : XFRM_MODE_SKB_CB(skb)->frag_off; 55 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
56 ip_select_ident(top_iph, dst->child, NULL); 56 ip_select_ident(top_iph, dst->child, NULL);
57 57
58 top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT); 58 top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT);