aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/devinet.c9
-rw-r--r--net/ipv4/fib_frontend.c1
-rw-r--r--net/ipv4/fib_semantics.c5
-rw-r--r--net/ipv4/inet_connection_sock.c11
-rw-r--r--net/ipv4/inet_fragment.c16
-rw-r--r--net/ipv4/inet_lro.c3
-rw-r--r--net/ipv4/ip_fragment.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c3
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c14
-rw-r--r--net/ipv4/raw.c11
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/syncookies.c3
-rw-r--r--net/ipv4/tcp.c36
-rw-r--r--net/ipv4/tcp_input.c80
-rw-r--r--net/ipv4/tcp_ipv4.c20
-rw-r--r--net/ipv4/tcp_minisocks.c32
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv4/tcp_timer.c5
-rw-r--r--net/ipv4/tunnel4.c2
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c2
21 files changed, 122 insertions, 140 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 6848e4760f34..79a7ef6209ff 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -90,7 +90,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90 [IFA_LOCAL] = { .type = NLA_U32 }, 90 [IFA_LOCAL] = { .type = NLA_U32 },
91 [IFA_ADDRESS] = { .type = NLA_U32 }, 91 [IFA_ADDRESS] = { .type = NLA_U32 },
92 [IFA_BROADCAST] = { .type = NLA_U32 }, 92 [IFA_BROADCAST] = { .type = NLA_U32 },
93 [IFA_ANYCAST] = { .type = NLA_U32 },
94 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, 93 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95}; 94};
96 95
@@ -536,9 +535,6 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
536 if (tb[IFA_BROADCAST]) 535 if (tb[IFA_BROADCAST])
537 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]); 536 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
538 537
539 if (tb[IFA_ANYCAST])
540 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
541
542 if (tb[IFA_LABEL]) 538 if (tb[IFA_LABEL])
543 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); 539 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
544 else 540 else
@@ -745,7 +741,6 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
745 break; 741 break;
746 inet_del_ifa(in_dev, ifap, 0); 742 inet_del_ifa(in_dev, ifap, 0);
747 ifa->ifa_broadcast = 0; 743 ifa->ifa_broadcast = 0;
748 ifa->ifa_anycast = 0;
749 ifa->ifa_scope = 0; 744 ifa->ifa_scope = 0;
750 } 745 }
751 746
@@ -1113,7 +1108,6 @@ static inline size_t inet_nlmsg_size(void)
1113 + nla_total_size(4) /* IFA_ADDRESS */ 1108 + nla_total_size(4) /* IFA_ADDRESS */
1114 + nla_total_size(4) /* IFA_LOCAL */ 1109 + nla_total_size(4) /* IFA_LOCAL */
1115 + nla_total_size(4) /* IFA_BROADCAST */ 1110 + nla_total_size(4) /* IFA_BROADCAST */
1116 + nla_total_size(4) /* IFA_ANYCAST */
1117 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ 1111 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1118} 1112}
1119 1113
@@ -1143,9 +1137,6 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1143 if (ifa->ifa_broadcast) 1137 if (ifa->ifa_broadcast)
1144 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast); 1138 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1145 1139
1146 if (ifa->ifa_anycast)
1147 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1148
1149 if (ifa->ifa_label[0]) 1140 if (ifa->ifa_label[0])
1150 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); 1141 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1151 1142
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 0f1557a4ac7a..0b2ac6a3d903 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -506,7 +506,6 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
506 [RTA_PREFSRC] = { .type = NLA_U32 }, 506 [RTA_PREFSRC] = { .type = NLA_U32 },
507 [RTA_METRICS] = { .type = NLA_NESTED }, 507 [RTA_METRICS] = { .type = NLA_NESTED },
508 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 508 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
509 [RTA_PROTOINFO] = { .type = NLA_U32 },
510 [RTA_FLOW] = { .type = NLA_U32 }, 509 [RTA_FLOW] = { .type = NLA_U32 },
511}; 510};
512 511
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3b83c34019fc..0d4d72827e4b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -960,7 +960,10 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
960 rtm->rtm_dst_len = dst_len; 960 rtm->rtm_dst_len = dst_len;
961 rtm->rtm_src_len = 0; 961 rtm->rtm_src_len = 0;
962 rtm->rtm_tos = tos; 962 rtm->rtm_tos = tos;
963 rtm->rtm_table = tb_id; 963 if (tb_id < 256)
964 rtm->rtm_table = tb_id;
965 else
966 rtm->rtm_table = RT_TABLE_COMPAT;
964 NLA_PUT_U32(skb, RTA_TABLE, tb_id); 967 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
965 rtm->rtm_type = type; 968 rtm->rtm_type = type;
966 rtm->rtm_flags = fi->fib_flags; 969 rtm->rtm_flags = fi->fib_flags;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 828ea211ff21..ec834480abe7 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -419,7 +419,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
419 struct inet_connection_sock *icsk = inet_csk(parent); 419 struct inet_connection_sock *icsk = inet_csk(parent);
420 struct request_sock_queue *queue = &icsk->icsk_accept_queue; 420 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
421 struct listen_sock *lopt = queue->listen_opt; 421 struct listen_sock *lopt = queue->listen_opt;
422 int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 422 int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
423 int thresh = max_retries;
423 unsigned long now = jiffies; 424 unsigned long now = jiffies;
424 struct request_sock **reqp, *req; 425 struct request_sock **reqp, *req;
425 int i, budget; 426 int i, budget;
@@ -455,6 +456,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
455 } 456 }
456 } 457 }
457 458
459 if (queue->rskq_defer_accept)
460 max_retries = queue->rskq_defer_accept;
461
458 budget = 2 * (lopt->nr_table_entries / (timeout / interval)); 462 budget = 2 * (lopt->nr_table_entries / (timeout / interval));
459 i = lopt->clock_hand; 463 i = lopt->clock_hand;
460 464
@@ -462,8 +466,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
462 reqp=&lopt->syn_table[i]; 466 reqp=&lopt->syn_table[i];
463 while ((req = *reqp) != NULL) { 467 while ((req = *reqp) != NULL) {
464 if (time_after_eq(now, req->expires)) { 468 if (time_after_eq(now, req->expires)) {
465 if (req->retrans < thresh && 469 if ((req->retrans < thresh ||
466 !req->rsk_ops->rtx_syn_ack(parent, req)) { 470 (inet_rsk(req)->acked && req->retrans < max_retries))
471 && !req->rsk_ops->rtx_syn_ack(parent, req)) {
467 unsigned long timeo; 472 unsigned long timeo;
468 473
469 if (req->retrans++ == 0) 474 if (req->retrans++ == 0)
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 4ed429bd5951..0546a0bc97ea 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -192,14 +192,21 @@ EXPORT_SYMBOL(inet_frag_evictor);
192 192
193static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, 193static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
194 struct inet_frag_queue *qp_in, struct inet_frags *f, 194 struct inet_frag_queue *qp_in, struct inet_frags *f,
195 unsigned int hash, void *arg) 195 void *arg)
196{ 196{
197 struct inet_frag_queue *qp; 197 struct inet_frag_queue *qp;
198#ifdef CONFIG_SMP 198#ifdef CONFIG_SMP
199 struct hlist_node *n; 199 struct hlist_node *n;
200#endif 200#endif
201 unsigned int hash;
201 202
202 write_lock(&f->lock); 203 write_lock(&f->lock);
204 /*
205 * While we stayed w/o the lock other CPU could update
206 * the rnd seed, so we need to re-calculate the hash
207 * chain. Fortunatelly the qp_in can be used to get one.
208 */
209 hash = f->hashfn(qp_in);
203#ifdef CONFIG_SMP 210#ifdef CONFIG_SMP
204 /* With SMP race we have to recheck hash table, because 211 /* With SMP race we have to recheck hash table, because
205 * such entry could be created on other cpu, while we 212 * such entry could be created on other cpu, while we
@@ -247,7 +254,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
247} 254}
248 255
249static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, 256static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
250 struct inet_frags *f, void *arg, unsigned int hash) 257 struct inet_frags *f, void *arg)
251{ 258{
252 struct inet_frag_queue *q; 259 struct inet_frag_queue *q;
253 260
@@ -255,7 +262,7 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
255 if (q == NULL) 262 if (q == NULL)
256 return NULL; 263 return NULL;
257 264
258 return inet_frag_intern(nf, q, f, hash, arg); 265 return inet_frag_intern(nf, q, f, arg);
259} 266}
260 267
261struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, 268struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
@@ -264,7 +271,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
264 struct inet_frag_queue *q; 271 struct inet_frag_queue *q;
265 struct hlist_node *n; 272 struct hlist_node *n;
266 273
267 read_lock(&f->lock);
268 hlist_for_each_entry(q, n, &f->hash[hash], list) { 274 hlist_for_each_entry(q, n, &f->hash[hash], list) {
269 if (q->net == nf && f->match(q, key)) { 275 if (q->net == nf && f->match(q, key)) {
270 atomic_inc(&q->refcnt); 276 atomic_inc(&q->refcnt);
@@ -274,6 +280,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
274 } 280 }
275 read_unlock(&f->lock); 281 read_unlock(&f->lock);
276 282
277 return inet_frag_create(nf, f, key, hash); 283 return inet_frag_create(nf, f, key);
278} 284}
279EXPORT_SYMBOL(inet_frag_find); 285EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index 4a4d49fca1f2..cfd034a2b96e 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -383,8 +383,7 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
383out2: /* send aggregated SKBs to stack */ 383out2: /* send aggregated SKBs to stack */
384 lro_flush(lro_mgr, lro_desc); 384 lro_flush(lro_mgr, lro_desc);
385 385
386out: /* Original SKB has to be posted to stack */ 386out:
387 skb->ip_summed = lro_mgr->ip_summed;
388 return 1; 387 return 1;
389} 388}
390 389
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cd6ce6ac6358..37221f659159 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -229,6 +229,8 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
229 229
230 arg.iph = iph; 230 arg.iph = iph;
231 arg.user = user; 231 arg.user = user;
232
233 read_lock(&ip4_frags.lock);
232 hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); 234 hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
233 235
234 q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); 236 q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 04578593e100..d2a887fc8d9b 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -556,7 +556,6 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
556 556
557 spin_lock_bh(&nf_nat_lock); 557 spin_lock_bh(&nf_nat_lock);
558 hlist_del_rcu(&nat->bysource); 558 hlist_del_rcu(&nat->bysource);
559 nat->ct = NULL;
560 spin_unlock_bh(&nf_nat_lock); 559 spin_unlock_bh(&nf_nat_lock);
561} 560}
562 561
@@ -570,8 +569,8 @@ static void nf_nat_move_storage(void *new, void *old)
570 return; 569 return;
571 570
572 spin_lock_bh(&nf_nat_lock); 571 spin_lock_bh(&nf_nat_lock);
573 hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
574 new_nat->ct = ct; 572 new_nat->ct = ct;
573 hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
575 spin_unlock_bh(&nf_nat_lock); 574 spin_unlock_bh(&nf_nat_lock);
576} 575}
577 576
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 5daefad3d193..7750c97fde7b 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -232,6 +232,11 @@ static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
232 } 232 }
233 } 233 }
234 } 234 }
235
236 /* don't trust len bigger than ctx buffer */
237 if (*len > ctx->end - ctx->pointer)
238 return 0;
239
235 return 1; 240 return 1;
236} 241}
237 242
@@ -250,6 +255,10 @@ static unsigned char asn1_header_decode(struct asn1_ctx *ctx,
250 if (!asn1_length_decode(ctx, &def, &len)) 255 if (!asn1_length_decode(ctx, &def, &len))
251 return 0; 256 return 0;
252 257
258 /* primitive shall be definite, indefinite shall be constructed */
259 if (*con == ASN1_PRI && !def)
260 return 0;
261
253 if (def) 262 if (def)
254 *eoc = ctx->pointer + len; 263 *eoc = ctx->pointer + len;
255 else 264 else
@@ -434,6 +443,11 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
434 unsigned long *optr; 443 unsigned long *optr;
435 444
436 size = eoc - ctx->pointer + 1; 445 size = eoc - ctx->pointer + 1;
446
447 /* first subid actually encodes first two subids */
448 if (size < 2 || size > ULONG_MAX/sizeof(unsigned long))
449 return 0;
450
437 *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); 451 *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
438 if (*oid == NULL) { 452 if (*oid == NULL) {
439 if (net_ratelimit()) 453 if (net_ratelimit())
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index fead049daf43..37a1ecd9d600 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -608,6 +608,14 @@ static void raw_close(struct sock *sk, long timeout)
608 sk_common_release(sk); 608 sk_common_release(sk);
609} 609}
610 610
611static int raw_destroy(struct sock *sk)
612{
613 lock_sock(sk);
614 ip_flush_pending_frames(sk);
615 release_sock(sk);
616 return 0;
617}
618
611/* This gets rid of all the nasties in af_inet. -DaveM */ 619/* This gets rid of all the nasties in af_inet. -DaveM */
612static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) 620static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
613{ 621{
@@ -820,6 +828,7 @@ struct proto raw_prot = {
820 .name = "RAW", 828 .name = "RAW",
821 .owner = THIS_MODULE, 829 .owner = THIS_MODULE,
822 .close = raw_close, 830 .close = raw_close,
831 .destroy = raw_destroy,
823 .connect = ip4_datagram_connect, 832 .connect = ip4_datagram_connect,
824 .disconnect = udp_disconnect, 833 .disconnect = udp_disconnect,
825 .ioctl = raw_ioctl, 834 .ioctl = raw_ioctl,
@@ -925,7 +934,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
925 srcp = inet->num; 934 srcp = inet->num;
926 935
927 seq_printf(seq, "%4d: %08X:%04X %08X:%04X" 936 seq_printf(seq, "%4d: %08X:%04X %08X:%04X"
928 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d", 937 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n",
929 i, src, srcp, dest, destp, sp->sk_state, 938 i, src, srcp, dest, destp, sp->sk_state,
930 atomic_read(&sp->sk_wmem_alloc), 939 atomic_read(&sp->sk_wmem_alloc),
931 atomic_read(&sp->sk_rmem_alloc), 940 atomic_read(&sp->sk_rmem_alloc),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index df41026b60db..96be336064fb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1792,7 +1792,7 @@ static int __mkroute_input(struct sk_buff *skb,
1792 if (err) 1792 if (err)
1793 flags |= RTCF_DIRECTSRC; 1793 flags |= RTCF_DIRECTSRC;
1794 1794
1795 if (out_dev == in_dev && err && !(flags & RTCF_MASQ) && 1795 if (out_dev == in_dev && err &&
1796 (IN_DEV_SHARED_MEDIA(out_dev) || 1796 (IN_DEV_SHARED_MEDIA(out_dev) ||
1797 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) 1797 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1798 flags |= RTCF_DOREDIRECT; 1798 flags |= RTCF_DOREDIRECT;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 73ba98921d64..d182a2a26291 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -285,7 +285,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
285 cookie_check_timestamp(&tcp_opt); 285 cookie_check_timestamp(&tcp_opt);
286 286
287 ret = NULL; 287 ret = NULL;
288 req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */ 288 req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
289 if (!req) 289 if (!req)
290 goto out; 290 goto out;
291 291
@@ -301,7 +301,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
301 ireq->rmt_port = th->source; 301 ireq->rmt_port = th->source;
302 ireq->loc_addr = ip_hdr(skb)->daddr; 302 ireq->loc_addr = ip_hdr(skb)->daddr;
303 ireq->rmt_addr = ip_hdr(skb)->saddr; 303 ireq->rmt_addr = ip_hdr(skb)->saddr;
304 ireq->opt = NULL;
305 ireq->snd_wscale = tcp_opt.snd_wscale; 304 ireq->snd_wscale = tcp_opt.snd_wscale;
306 ireq->rcv_wscale = tcp_opt.rcv_wscale; 305 ireq->rcv_wscale = tcp_opt.rcv_wscale;
307 ireq->sack_ok = tcp_opt.sack_ok; 306 ireq->sack_ok = tcp_opt.sack_ok;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f88653138621..850825dc86e6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -260,6 +260,8 @@
260#include <linux/socket.h> 260#include <linux/socket.h>
261#include <linux/random.h> 261#include <linux/random.h>
262#include <linux/bootmem.h> 262#include <linux/bootmem.h>
263#include <linux/highmem.h>
264#include <linux/swap.h>
263#include <linux/cache.h> 265#include <linux/cache.h>
264#include <linux/err.h> 266#include <linux/err.h>
265#include <linux/crypto.h> 267#include <linux/crypto.h>
@@ -1227,7 +1229,14 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1227 copied += used; 1229 copied += used;
1228 offset += used; 1230 offset += used;
1229 } 1231 }
1230 if (offset != skb->len) 1232 /*
1233 * If recv_actor drops the lock (e.g. TCP splice
1234 * receive) the skb pointer might be invalid when
1235 * getting here: tcp_collapse might have deleted it
1236 * while aggregating skbs from the socket queue.
1237 */
1238 skb = tcp_recv_skb(sk, seq-1, &offset);
1239 if (!skb || (offset+1 != skb->len))
1231 break; 1240 break;
1232 } 1241 }
1233 if (tcp_hdr(skb)->fin) { 1242 if (tcp_hdr(skb)->fin) {
@@ -2105,12 +2114,15 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2105 break; 2114 break;
2106 2115
2107 case TCP_DEFER_ACCEPT: 2116 case TCP_DEFER_ACCEPT:
2108 if (val < 0) { 2117 icsk->icsk_accept_queue.rskq_defer_accept = 0;
2109 err = -EINVAL; 2118 if (val > 0) {
2110 } else { 2119 /* Translate value in seconds to number of
2111 if (val > MAX_TCP_ACCEPT_DEFERRED) 2120 * retransmits */
2112 val = MAX_TCP_ACCEPT_DEFERRED; 2121 while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
2113 icsk->icsk_accept_queue.rskq_defer_accept = val; 2122 val > ((TCP_TIMEOUT_INIT / HZ) <<
2123 icsk->icsk_accept_queue.rskq_defer_accept))
2124 icsk->icsk_accept_queue.rskq_defer_accept++;
2125 icsk->icsk_accept_queue.rskq_defer_accept++;
2114 } 2126 }
2115 break; 2127 break;
2116 2128
@@ -2292,7 +2304,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2292 val = (val ? : sysctl_tcp_fin_timeout) / HZ; 2304 val = (val ? : sysctl_tcp_fin_timeout) / HZ;
2293 break; 2305 break;
2294 case TCP_DEFER_ACCEPT: 2306 case TCP_DEFER_ACCEPT:
2295 val = icsk->icsk_accept_queue.rskq_defer_accept; 2307 val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
2308 ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
2296 break; 2309 break;
2297 case TCP_WINDOW_CLAMP: 2310 case TCP_WINDOW_CLAMP:
2298 val = tp->window_clamp; 2311 val = tp->window_clamp;
@@ -2609,7 +2622,7 @@ __setup("thash_entries=", set_thash_entries);
2609void __init tcp_init(void) 2622void __init tcp_init(void)
2610{ 2623{
2611 struct sk_buff *skb = NULL; 2624 struct sk_buff *skb = NULL;
2612 unsigned long limit; 2625 unsigned long nr_pages, limit;
2613 int order, i, max_share; 2626 int order, i, max_share;
2614 2627
2615 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); 2628 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
@@ -2678,8 +2691,9 @@ void __init tcp_init(void)
2678 * is up to 1/2 at 256 MB, decreasing toward zero with the amount of 2691 * is up to 1/2 at 256 MB, decreasing toward zero with the amount of
2679 * memory, with a floor of 128 pages. 2692 * memory, with a floor of 128 pages.
2680 */ 2693 */
2681 limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); 2694 nr_pages = totalram_pages - totalhigh_pages;
2682 limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); 2695 limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
2696 limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
2683 limit = max(limit, 128UL); 2697 limit = max(limit, 128UL);
2684 sysctl_tcp_mem[0] = limit / 4 * 3; 2698 sysctl_tcp_mem[0] = limit / 4 * 3;
2685 sysctl_tcp_mem[1] = limit; 2699 sysctl_tcp_mem[1] = limit;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b54d9d37b636..cad73b7dfef0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1392,9 +1392,9 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
1392 1392
1393 if (before(next_dup->start_seq, skip_to_seq)) { 1393 if (before(next_dup->start_seq, skip_to_seq)) {
1394 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count); 1394 skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count);
1395 tcp_sacktag_walk(skb, sk, NULL, 1395 skb = tcp_sacktag_walk(skb, sk, NULL,
1396 next_dup->start_seq, next_dup->end_seq, 1396 next_dup->start_seq, next_dup->end_seq,
1397 1, fack_count, reord, flag); 1397 1, fack_count, reord, flag);
1398 } 1398 }
1399 1399
1400 return skb; 1400 return skb;
@@ -2483,6 +2483,20 @@ static inline void tcp_complete_cwr(struct sock *sk)
2483 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); 2483 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
2484} 2484}
2485 2485
2486static void tcp_try_keep_open(struct sock *sk)
2487{
2488 struct tcp_sock *tp = tcp_sk(sk);
2489 int state = TCP_CA_Open;
2490
2491 if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker)
2492 state = TCP_CA_Disorder;
2493
2494 if (inet_csk(sk)->icsk_ca_state != state) {
2495 tcp_set_ca_state(sk, state);
2496 tp->high_seq = tp->snd_nxt;
2497 }
2498}
2499
2486static void tcp_try_to_open(struct sock *sk, int flag) 2500static void tcp_try_to_open(struct sock *sk, int flag)
2487{ 2501{
2488 struct tcp_sock *tp = tcp_sk(sk); 2502 struct tcp_sock *tp = tcp_sk(sk);
@@ -2496,15 +2510,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
2496 tcp_enter_cwr(sk, 1); 2510 tcp_enter_cwr(sk, 1);
2497 2511
2498 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { 2512 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2499 int state = TCP_CA_Open; 2513 tcp_try_keep_open(sk);
2500
2501 if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker)
2502 state = TCP_CA_Disorder;
2503
2504 if (inet_csk(sk)->icsk_ca_state != state) {
2505 tcp_set_ca_state(sk, state);
2506 tp->high_seq = tp->snd_nxt;
2507 }
2508 tcp_moderate_cwnd(tp); 2514 tcp_moderate_cwnd(tp);
2509 } else { 2515 } else {
2510 tcp_cwnd_down(sk, flag); 2516 tcp_cwnd_down(sk, flag);
@@ -3310,8 +3316,11 @@ no_queue:
3310 return 1; 3316 return 1;
3311 3317
3312old_ack: 3318old_ack:
3313 if (TCP_SKB_CB(skb)->sacked) 3319 if (TCP_SKB_CB(skb)->sacked) {
3314 tcp_sacktag_write_queue(sk, skb, prior_snd_una); 3320 tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3321 if (icsk->icsk_ca_state == TCP_CA_Open)
3322 tcp_try_keep_open(sk);
3323 }
3315 3324
3316uninteresting_ack: 3325uninteresting_ack:
3317 SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt); 3326 SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
@@ -4532,49 +4541,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
4532 } 4541 }
4533} 4542}
4534 4543
4535static int tcp_defer_accept_check(struct sock *sk)
4536{
4537 struct tcp_sock *tp = tcp_sk(sk);
4538
4539 if (tp->defer_tcp_accept.request) {
4540 int queued_data = tp->rcv_nxt - tp->copied_seq;
4541 int hasfin = !skb_queue_empty(&sk->sk_receive_queue) ?
4542 tcp_hdr((struct sk_buff *)
4543 sk->sk_receive_queue.prev)->fin : 0;
4544
4545 if (queued_data && hasfin)
4546 queued_data--;
4547
4548 if (queued_data &&
4549 tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) {
4550 if (sock_flag(sk, SOCK_KEEPOPEN)) {
4551 inet_csk_reset_keepalive_timer(sk,
4552 keepalive_time_when(tp));
4553 } else {
4554 inet_csk_delete_keepalive_timer(sk);
4555 }
4556
4557 inet_csk_reqsk_queue_add(
4558 tp->defer_tcp_accept.listen_sk,
4559 tp->defer_tcp_accept.request,
4560 sk);
4561
4562 tp->defer_tcp_accept.listen_sk->sk_data_ready(
4563 tp->defer_tcp_accept.listen_sk, 0);
4564
4565 sock_put(tp->defer_tcp_accept.listen_sk);
4566 sock_put(sk);
4567 tp->defer_tcp_accept.listen_sk = NULL;
4568 tp->defer_tcp_accept.request = NULL;
4569 } else if (hasfin ||
4570 tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) {
4571 tcp_reset(sk);
4572 return -1;
4573 }
4574 }
4575 return 0;
4576}
4577
4578static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) 4544static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
4579{ 4545{
4580 struct tcp_sock *tp = tcp_sk(sk); 4546 struct tcp_sock *tp = tcp_sk(sk);
@@ -4935,8 +4901,6 @@ step5:
4935 4901
4936 tcp_data_snd_check(sk); 4902 tcp_data_snd_check(sk);
4937 tcp_ack_snd_check(sk); 4903 tcp_ack_snd_check(sk);
4938
4939 tcp_defer_accept_check(sk);
4940 return 0; 4904 return 0;
4941 4905
4942csum_error: 4906csum_error:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cd601a866c2f..ffe869ac1bcf 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -85,10 +85,6 @@
85int sysctl_tcp_tw_reuse __read_mostly; 85int sysctl_tcp_tw_reuse __read_mostly;
86int sysctl_tcp_low_latency __read_mostly; 86int sysctl_tcp_low_latency __read_mostly;
87 87
88/* Check TCP sequence numbers in ICMP packets. */
89#define ICMP_MIN_LENGTH 8
90
91void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
92 88
93#ifdef CONFIG_TCP_MD5SIG 89#ifdef CONFIG_TCP_MD5SIG
94static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, 90static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
@@ -1285,7 +1281,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1285 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 1281 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1286 goto drop; 1282 goto drop;
1287 1283
1288 req = reqsk_alloc(&tcp_request_sock_ops); 1284 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1289 if (!req) 1285 if (!req)
1290 goto drop; 1286 goto drop;
1291 1287
@@ -1918,14 +1914,6 @@ int tcp_v4_destroy_sock(struct sock *sk)
1918 sk->sk_sndmsg_page = NULL; 1914 sk->sk_sndmsg_page = NULL;
1919 } 1915 }
1920 1916
1921 if (tp->defer_tcp_accept.request) {
1922 reqsk_free(tp->defer_tcp_accept.request);
1923 sock_put(tp->defer_tcp_accept.listen_sk);
1924 sock_put(sk);
1925 tp->defer_tcp_accept.listen_sk = NULL;
1926 tp->defer_tcp_accept.request = NULL;
1927 }
1928
1929 atomic_dec(&tcp_sockets_allocated); 1917 atomic_dec(&tcp_sockets_allocated);
1930 1918
1931 return 0; 1919 return 0;
@@ -2303,7 +2291,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2303 } 2291 }
2304 2292
2305 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 2293 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2306 "%08X %5d %8d %lu %d %p %u %u %u %u %d%n", 2294 "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
2307 i, src, srcp, dest, destp, sk->sk_state, 2295 i, src, srcp, dest, destp, sk->sk_state,
2308 tp->write_seq - tp->snd_una, 2296 tp->write_seq - tp->snd_una,
2309 sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog : 2297 sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
@@ -2315,8 +2303,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2315 icsk->icsk_probes_out, 2303 icsk->icsk_probes_out,
2316 sock_i_ino(sk), 2304 sock_i_ino(sk),
2317 atomic_read(&sk->sk_refcnt), sk, 2305 atomic_read(&sk->sk_refcnt), sk,
2318 icsk->icsk_rto, 2306 jiffies_to_clock_t(icsk->icsk_rto),
2319 icsk->icsk_ack.ato, 2307 jiffies_to_clock_t(icsk->icsk_ack.ato),
2320 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 2308 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2321 tp->snd_cwnd, 2309 tp->snd_cwnd,
2322 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh, 2310 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 019c8c16e5cc..8245247a6ceb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -571,8 +571,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
571 does sequence test, SYN is truncated, and thus we consider 571 does sequence test, SYN is truncated, and thus we consider
572 it a bare ACK. 572 it a bare ACK.
573 573
574 Both ends (listening sockets) accept the new incoming 574 If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
575 connection and try to talk to each other. 8-) 575 bare ACK. Otherwise, we create an established connection. Both
576 ends (listening sockets) accept the new incoming connection and try
577 to talk to each other. 8-)
576 578
577 Note: This case is both harmless, and rare. Possibility is about the 579 Note: This case is both harmless, and rare. Possibility is about the
578 same as us discovering intelligent life on another plant tomorrow. 580 same as us discovering intelligent life on another plant tomorrow.
@@ -640,6 +642,13 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
640 if (!(flg & TCP_FLAG_ACK)) 642 if (!(flg & TCP_FLAG_ACK))
641 return NULL; 643 return NULL;
642 644
645 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
646 if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
647 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
648 inet_rsk(req)->acked = 1;
649 return NULL;
650 }
651
643 /* OK, ACK is valid, create big socket and 652 /* OK, ACK is valid, create big socket and
644 * feed this segment to it. It will repeat all 653 * feed this segment to it. It will repeat all
645 * the tests. THIS SEGMENT MUST MOVE SOCKET TO 654 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
@@ -678,24 +687,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
678 inet_csk_reqsk_queue_unlink(sk, req, prev); 687 inet_csk_reqsk_queue_unlink(sk, req, prev);
679 inet_csk_reqsk_queue_removed(sk, req); 688 inet_csk_reqsk_queue_removed(sk, req);
680 689
681 if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && 690 inet_csk_reqsk_queue_add(sk, req, child);
682 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
683
684 /* the accept queue handling is done is est recv slow
685 * path so lets make sure to start there
686 */
687 tcp_sk(child)->pred_flags = 0;
688 sock_hold(sk);
689 sock_hold(child);
690 tcp_sk(child)->defer_tcp_accept.listen_sk = sk;
691 tcp_sk(child)->defer_tcp_accept.request = req;
692
693 inet_csk_reset_keepalive_timer(child,
694 inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ);
695 } else {
696 inet_csk_reqsk_queue_add(sk, req, child);
697 }
698
699 return child; 691 return child;
700 692
701 listen_overflow: 693 listen_overflow:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e399bde7813a..ad993ecb4810 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2131,6 +2131,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2131 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2131 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2132 if (tcp_transmit_skb(sk, skb, 0, priority)) 2132 if (tcp_transmit_skb(sk, skb, 0, priority))
2133 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); 2133 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
2134
2135 TCP_INC_STATS(TCP_MIB_OUTRSTS);
2134} 2136}
2135 2137
2136/* WARNING: This routine must only be called when we have already sent 2138/* WARNING: This routine must only be called when we have already sent
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 4de68cf5f2aa..63ed9d6830e7 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -489,11 +489,6 @@ static void tcp_keepalive_timer (unsigned long data)
489 goto death; 489 goto death;
490 } 490 }
491 491
492 if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) {
493 tcp_send_active_reset(sk, GFP_ATOMIC);
494 goto death;
495 }
496
497 if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE) 492 if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
498 goto out; 493 goto out;
499 494
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index d3b709a6f264..cb1f0e83830b 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -97,7 +97,7 @@ static int tunnel64_rcv(struct sk_buff *skb)
97{ 97{
98 struct xfrm_tunnel *handler; 98 struct xfrm_tunnel *handler;
99 99
100 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 100 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
101 goto drop; 101 goto drop;
102 102
103 for (handler = tunnel64_handlers; handler; handler = handler->next) 103 for (handler = tunnel64_handlers; handler; handler = handler->next)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index db1cb7c96d63..56fcda3694ba 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -420,7 +420,7 @@ void udp_err(struct sk_buff *skb, u32 info)
420/* 420/*
421 * Throw away all pending data and cancel the corking. Socket is locked. 421 * Throw away all pending data and cancel the corking. Socket is locked.
422 */ 422 */
423static void udp_flush_pending_frames(struct sock *sk) 423void udp_flush_pending_frames(struct sock *sk)
424{ 424{
425 struct udp_sock *up = udp_sk(sk); 425 struct udp_sock *up = udp_sk(sk);
426 426
@@ -430,6 +430,7 @@ static void udp_flush_pending_frames(struct sock *sk)
430 ip_flush_pending_frames(sk); 430 ip_flush_pending_frames(sk);
431 } 431 }
432} 432}
433EXPORT_SYMBOL(udp_flush_pending_frames);
433 434
434/** 435/**
435 * udp4_hwcsum_outgoing - handle outgoing HW checksumming 436 * udp4_hwcsum_outgoing - handle outgoing HW checksumming
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 584e6d74e3a9..7135279f3f84 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -52,7 +52,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
52 IP_ECN_clear(top_iph); 52 IP_ECN_clear(top_iph);
53 53
54 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? 54 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
55 0 : XFRM_MODE_SKB_CB(skb)->frag_off; 55 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
56 ip_select_ident(top_iph, dst->child, NULL); 56 ip_select_ident(top_iph, dst->child, NULL);
57 57
58 top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT); 58 top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT);