aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2013-04-24 14:33:46 -0400
committerThomas Gleixner <tglx@linutronix.de>2013-04-24 14:33:54 -0400
commit6402c7dc2a19c19bd8cdc7d80878b850da418942 (patch)
treecda2ea2df40442e2aa016119f3548cc504127ea8 /net/ipv4
parent77c675ba18836802f6b73d2d773481d06ebc0f04 (diff)
parent60d509fa6a9c4653a86ad830e4c4b30360b23f0e (diff)
Merge branch 'linus' into timers/core
Reason: Get upstream fixes before adding conflicting code. Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c3
-rw-r--r--net/ipv4/devinet.c66
-rw-r--r--net/ipv4/esp4.c6
-rw-r--r--net/ipv4/inet_fragment.c20
-rw-r--r--net/ipv4/ip_fragment.c25
-rw-r--r--net/ipv4/ip_gre.c5
-rw-r--r--net/ipv4/ip_options.c5
-rw-r--r--net/ipv4/ipconfig.c3
-rw-r--r--net/ipv4/netfilter/Kconfig13
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c8
-rw-r--r--net/ipv4/syncookies.c4
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_input.c71
-rw-r--r--net/ipv4/tcp_ipv4.c14
-rw-r--r--net/ipv4/tcp_output.c17
-rw-r--r--net/ipv4/udp.c7
16 files changed, 158 insertions, 111 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 68f6a94f7661..c929d9c1c4b6 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1333,8 +1333,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1333 iph->frag_off |= htons(IP_MF); 1333 iph->frag_off |= htons(IP_MF);
1334 offset += (skb->len - skb->mac_len - iph->ihl * 4); 1334 offset += (skb->len - skb->mac_len - iph->ihl * 4);
1335 } else { 1335 } else {
1336 if (!(iph->frag_off & htons(IP_DF))) 1336 iph->id = htons(id++);
1337 iph->id = htons(id++);
1338 } 1337 }
1339 iph->tot_len = htons(skb->len - skb->mac_len); 1338 iph->tot_len = htons(skb->len - skb->mac_len);
1340 iph->check = 0; 1339 iph->check = 0;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index f678507bc829..c6287cd978c2 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -587,13 +587,16 @@ static void check_lifetime(struct work_struct *work)
587{ 587{
588 unsigned long now, next, next_sec, next_sched; 588 unsigned long now, next, next_sec, next_sched;
589 struct in_ifaddr *ifa; 589 struct in_ifaddr *ifa;
590 struct hlist_node *n;
590 int i; 591 int i;
591 592
592 now = jiffies; 593 now = jiffies;
593 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); 594 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
594 595
595 rcu_read_lock();
596 for (i = 0; i < IN4_ADDR_HSIZE; i++) { 596 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
597 bool change_needed = false;
598
599 rcu_read_lock();
597 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { 600 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
598 unsigned long age; 601 unsigned long age;
599 602
@@ -606,16 +609,7 @@ static void check_lifetime(struct work_struct *work)
606 609
607 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && 610 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
608 age >= ifa->ifa_valid_lft) { 611 age >= ifa->ifa_valid_lft) {
609 struct in_ifaddr **ifap ; 612 change_needed = true;
610
611 rtnl_lock();
612 for (ifap = &ifa->ifa_dev->ifa_list;
613 *ifap != NULL; ifap = &ifa->ifa_next) {
614 if (*ifap == ifa)
615 inet_del_ifa(ifa->ifa_dev,
616 ifap, 1);
617 }
618 rtnl_unlock();
619 } else if (ifa->ifa_preferred_lft == 613 } else if (ifa->ifa_preferred_lft ==
620 INFINITY_LIFE_TIME) { 614 INFINITY_LIFE_TIME) {
621 continue; 615 continue;
@@ -625,10 +619,8 @@ static void check_lifetime(struct work_struct *work)
625 next = ifa->ifa_tstamp + 619 next = ifa->ifa_tstamp +
626 ifa->ifa_valid_lft * HZ; 620 ifa->ifa_valid_lft * HZ;
627 621
628 if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) { 622 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
629 ifa->ifa_flags |= IFA_F_DEPRECATED; 623 change_needed = true;
630 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
631 }
632 } else if (time_before(ifa->ifa_tstamp + 624 } else if (time_before(ifa->ifa_tstamp +
633 ifa->ifa_preferred_lft * HZ, 625 ifa->ifa_preferred_lft * HZ,
634 next)) { 626 next)) {
@@ -636,8 +628,42 @@ static void check_lifetime(struct work_struct *work)
636 ifa->ifa_preferred_lft * HZ; 628 ifa->ifa_preferred_lft * HZ;
637 } 629 }
638 } 630 }
631 rcu_read_unlock();
632 if (!change_needed)
633 continue;
634 rtnl_lock();
635 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
636 unsigned long age;
637
638 if (ifa->ifa_flags & IFA_F_PERMANENT)
639 continue;
640
641 /* We try to batch several events at once. */
642 age = (now - ifa->ifa_tstamp +
643 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
644
645 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
646 age >= ifa->ifa_valid_lft) {
647 struct in_ifaddr **ifap;
648
649 for (ifap = &ifa->ifa_dev->ifa_list;
650 *ifap != NULL; ifap = &(*ifap)->ifa_next) {
651 if (*ifap == ifa) {
652 inet_del_ifa(ifa->ifa_dev,
653 ifap, 1);
654 break;
655 }
656 }
657 } else if (ifa->ifa_preferred_lft !=
658 INFINITY_LIFE_TIME &&
659 age >= ifa->ifa_preferred_lft &&
660 !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
661 ifa->ifa_flags |= IFA_F_DEPRECATED;
662 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
663 }
664 }
665 rtnl_unlock();
639 } 666 }
640 rcu_read_unlock();
641 667
642 next_sec = round_jiffies_up(next); 668 next_sec = round_jiffies_up(next);
643 next_sched = next; 669 next_sched = next;
@@ -802,8 +828,12 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
802 if (nlh->nlmsg_flags & NLM_F_EXCL || 828 if (nlh->nlmsg_flags & NLM_F_EXCL ||
803 !(nlh->nlmsg_flags & NLM_F_REPLACE)) 829 !(nlh->nlmsg_flags & NLM_F_REPLACE))
804 return -EEXIST; 830 return -EEXIST;
805 831 ifa = ifa_existing;
806 set_ifa_lifetime(ifa_existing, valid_lft, prefered_lft); 832 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
833 cancel_delayed_work(&check_lifetime_work);
834 schedule_delayed_work(&check_lifetime_work, 0);
835 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
836 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
807 } 837 }
808 return 0; 838 return 0;
809} 839}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 3b4f0cd2e63e..4cfe34d4cc96 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -139,8 +139,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
139 139
140 /* skb is pure payload to encrypt */ 140 /* skb is pure payload to encrypt */
141 141
142 err = -ENOMEM;
143
144 esp = x->data; 142 esp = x->data;
145 aead = esp->aead; 143 aead = esp->aead;
146 alen = crypto_aead_authsize(aead); 144 alen = crypto_aead_authsize(aead);
@@ -176,8 +174,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
176 } 174 }
177 175
178 tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); 176 tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
179 if (!tmp) 177 if (!tmp) {
178 err = -ENOMEM;
180 goto error; 179 goto error;
180 }
181 181
182 seqhi = esp_tmp_seqhi(tmp); 182 seqhi = esp_tmp_seqhi(tmp);
183 iv = esp_tmp_iv(aead, tmp, seqhilen); 183 iv = esp_tmp_iv(aead, tmp, seqhilen);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 245ae078a07f..f4fd23de9b13 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -21,6 +21,7 @@
21#include <linux/rtnetlink.h> 21#include <linux/rtnetlink.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23 23
24#include <net/sock.h>
24#include <net/inet_frag.h> 25#include <net/inet_frag.h>
25 26
26static void inet_frag_secret_rebuild(unsigned long dummy) 27static void inet_frag_secret_rebuild(unsigned long dummy)
@@ -277,6 +278,7 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
277 __releases(&f->lock) 278 __releases(&f->lock)
278{ 279{
279 struct inet_frag_queue *q; 280 struct inet_frag_queue *q;
281 int depth = 0;
280 282
281 hlist_for_each_entry(q, &f->hash[hash], list) { 283 hlist_for_each_entry(q, &f->hash[hash], list) {
282 if (q->net == nf && f->match(q, key)) { 284 if (q->net == nf && f->match(q, key)) {
@@ -284,9 +286,25 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
284 read_unlock(&f->lock); 286 read_unlock(&f->lock);
285 return q; 287 return q;
286 } 288 }
289 depth++;
287 } 290 }
288 read_unlock(&f->lock); 291 read_unlock(&f->lock);
289 292
290 return inet_frag_create(nf, f, key); 293 if (depth <= INETFRAGS_MAXDEPTH)
294 return inet_frag_create(nf, f, key);
295 else
296 return ERR_PTR(-ENOBUFS);
291} 297}
292EXPORT_SYMBOL(inet_frag_find); 298EXPORT_SYMBOL(inet_frag_find);
299
300void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
301 const char *prefix)
302{
303 static const char msg[] = "inet_frag_find: Fragment hash bucket"
304 " list length grew over limit " __stringify(INETFRAGS_MAXDEPTH)
305 ". Dropping fragment.\n";
306
307 if (PTR_ERR(q) == -ENOBUFS)
308 LIMIT_NETDEBUG(KERN_WARNING "%s%s", prefix, msg);
309}
310EXPORT_SYMBOL(inet_frag_maybe_warn_overflow);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b6d30acb600c..52c273ea05c3 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -248,8 +248,7 @@ static void ip_expire(unsigned long arg)
248 if (!head->dev) 248 if (!head->dev)
249 goto out_rcu_unlock; 249 goto out_rcu_unlock;
250 250
251 /* skb dst is stale, drop it, and perform route lookup again */ 251 /* skb has no dst, perform route lookup again */
252 skb_dst_drop(head);
253 iph = ip_hdr(head); 252 iph = ip_hdr(head);
254 err = ip_route_input_noref(head, iph->daddr, iph->saddr, 253 err = ip_route_input_noref(head, iph->daddr, iph->saddr,
255 iph->tos, head->dev); 254 iph->tos, head->dev);
@@ -292,14 +291,11 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
292 hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); 291 hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
293 292
294 q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); 293 q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
295 if (q == NULL) 294 if (IS_ERR_OR_NULL(q)) {
296 goto out_nomem; 295 inet_frag_maybe_warn_overflow(q, pr_fmt());
297 296 return NULL;
297 }
298 return container_of(q, struct ipq, q); 298 return container_of(q, struct ipq, q);
299
300out_nomem:
301 LIMIT_NETDEBUG(KERN_ERR pr_fmt("ip_frag_create: no memory left !\n"));
302 return NULL;
303} 299}
304 300
305/* Is the fragment too far ahead to be part of ipq? */ 301/* Is the fragment too far ahead to be part of ipq? */
@@ -526,9 +522,16 @@ found:
526 qp->q.max_size = skb->len + ihl; 522 qp->q.max_size = skb->len + ihl;
527 523
528 if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && 524 if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
529 qp->q.meat == qp->q.len) 525 qp->q.meat == qp->q.len) {
530 return ip_frag_reasm(qp, prev, dev); 526 unsigned long orefdst = skb->_skb_refdst;
527
528 skb->_skb_refdst = 0UL;
529 err = ip_frag_reasm(qp, prev, dev);
530 skb->_skb_refdst = orefdst;
531 return err;
532 }
531 533
534 skb_dst_drop(skb);
532 inet_frag_lru_move(&qp->q); 535 inet_frag_lru_move(&qp->q);
533 return -EINPROGRESS; 536 return -EINPROGRESS;
534 537
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d0ef0e674ec5..91d66dbde9c0 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -798,10 +798,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
798 798
799 if (dev->header_ops && dev->type == ARPHRD_IPGRE) { 799 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
800 gre_hlen = 0; 800 gre_hlen = 0;
801 if (skb->protocol == htons(ETH_P_IP)) 801 tiph = (const struct iphdr *)skb->data;
802 tiph = (const struct iphdr *)skb->data;
803 else
804 tiph = &tunnel->parms.iph;
805 } else { 802 } else {
806 gre_hlen = tunnel->hlen; 803 gre_hlen = tunnel->hlen;
807 tiph = &tunnel->parms.iph; 804 tiph = &tunnel->parms.iph;
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 310a3647c83d..ec7264514a82 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -370,7 +370,6 @@ int ip_options_compile(struct net *net,
370 } 370 }
371 switch (optptr[3]&0xF) { 371 switch (optptr[3]&0xF) {
372 case IPOPT_TS_TSONLY: 372 case IPOPT_TS_TSONLY:
373 opt->ts = optptr - iph;
374 if (skb) 373 if (skb)
375 timeptr = &optptr[optptr[2]-1]; 374 timeptr = &optptr[optptr[2]-1];
376 opt->ts_needtime = 1; 375 opt->ts_needtime = 1;
@@ -381,7 +380,6 @@ int ip_options_compile(struct net *net,
381 pp_ptr = optptr + 2; 380 pp_ptr = optptr + 2;
382 goto error; 381 goto error;
383 } 382 }
384 opt->ts = optptr - iph;
385 if (rt) { 383 if (rt) {
386 spec_dst_fill(&spec_dst, skb); 384 spec_dst_fill(&spec_dst, skb);
387 memcpy(&optptr[optptr[2]-1], &spec_dst, 4); 385 memcpy(&optptr[optptr[2]-1], &spec_dst, 4);
@@ -396,7 +394,6 @@ int ip_options_compile(struct net *net,
396 pp_ptr = optptr + 2; 394 pp_ptr = optptr + 2;
397 goto error; 395 goto error;
398 } 396 }
399 opt->ts = optptr - iph;
400 { 397 {
401 __be32 addr; 398 __be32 addr;
402 memcpy(&addr, &optptr[optptr[2]-1], 4); 399 memcpy(&addr, &optptr[optptr[2]-1], 4);
@@ -429,12 +426,12 @@ int ip_options_compile(struct net *net,
429 pp_ptr = optptr + 3; 426 pp_ptr = optptr + 3;
430 goto error; 427 goto error;
431 } 428 }
432 opt->ts = optptr - iph;
433 if (skb) { 429 if (skb) {
434 optptr[3] = (optptr[3]&0xF)|((overflow+1)<<4); 430 optptr[3] = (optptr[3]&0xF)|((overflow+1)<<4);
435 opt->is_changed = 1; 431 opt->is_changed = 1;
436 } 432 }
437 } 433 }
434 opt->ts = optptr - iph;
438 break; 435 break;
439 case IPOPT_RA: 436 case IPOPT_RA:
440 if (optlen < 4) { 437 if (optlen < 4) {
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 98cbc6877019..bf6c5cf31aed 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1522,7 +1522,8 @@ static int __init ip_auto_config(void)
1522 } 1522 }
1523 for (i++; i < CONF_NAMESERVERS_MAX; i++) 1523 for (i++; i < CONF_NAMESERVERS_MAX; i++)
1524 if (ic_nameservers[i] != NONE) 1524 if (ic_nameservers[i] != NONE)
1525 pr_cont(", nameserver%u=%pI4\n", i, &ic_nameservers[i]); 1525 pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]);
1526 pr_cont("\n");
1526#endif /* !SILENT */ 1527#endif /* !SILENT */
1527 1528
1528 return 0; 1529 return 0;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index ce2d43e1f09f..0d755c50994b 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -36,19 +36,6 @@ config NF_CONNTRACK_PROC_COMPAT
36 36
37 If unsure, say Y. 37 If unsure, say Y.
38 38
39config IP_NF_QUEUE
40 tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
41 depends on NETFILTER_ADVANCED
42 help
43 Netfilter has the ability to queue packets to user space: the
44 netlink device can be used to access them using this driver.
45
46 This option enables the old IPv4-only "ip_queue" implementation
47 which has been obsoleted by the new "nfnetlink_queue" code (see
48 CONFIG_NETFILTER_NETLINK_QUEUE).
49
50 To compile it as a module, choose M here. If unsure, say N.
51
52config IP_NF_IPTABLES 39config IP_NF_IPTABLES
53 tristate "IP tables support (required for filtering/masq/NAT)" 40 tristate "IP tables support (required for filtering/masq/NAT)"
54 default m if NETFILTER_ADVANCED=n 41 default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index c30130062cd6..c49dcd0284a0 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -66,6 +66,12 @@ static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
66 return dev_match; 66 return dev_match;
67} 67}
68 68
69static bool rpfilter_is_local(const struct sk_buff *skb)
70{
71 const struct rtable *rt = skb_rtable(skb);
72 return rt && (rt->rt_flags & RTCF_LOCAL);
73}
74
69static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) 75static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
70{ 76{
71 const struct xt_rpfilter_info *info; 77 const struct xt_rpfilter_info *info;
@@ -76,7 +82,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
76 info = par->matchinfo; 82 info = par->matchinfo;
77 invert = info->flags & XT_RPFILTER_INVERT; 83 invert = info->flags & XT_RPFILTER_INVERT;
78 84
79 if (par->in->flags & IFF_LOOPBACK) 85 if (rpfilter_is_local(skb))
80 return true ^ invert; 86 return true ^ invert;
81 87
82 iph = ip_hdr(skb); 88 iph = ip_hdr(skb);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index ef54377fb11c..397e0f69435f 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -349,8 +349,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
349 * hasn't changed since we received the original syn, but I see 349 * hasn't changed since we received the original syn, but I see
350 * no easy way to do this. 350 * no easy way to do this.
351 */ 351 */
352 flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), 352 flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark,
353 RT_SCOPE_UNIVERSE, IPPROTO_TCP, 353 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
354 inet_sk_flowi_flags(sk), 354 inet_sk_flowi_flags(sk),
355 (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, 355 (opt && opt->srr) ? opt->faddr : ireq->rmt_addr,
356 ireq->loc_addr, th->source, th->dest); 356 ireq->loc_addr, th->source, th->dest);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 47e854fcae24..e22020790709 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -775,7 +775,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
775 * Make sure that we have exactly size bytes 775 * Make sure that we have exactly size bytes
776 * available to the caller, no more, no less. 776 * available to the caller, no more, no less.
777 */ 777 */
778 skb->avail_size = size; 778 skb->reserved_tailroom = skb->end - skb->tail - size;
779 return skb; 779 return skb;
780 } 780 }
781 __kfree_skb(skb); 781 __kfree_skb(skb);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0d9bdacce99f..13b9c08fc158 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -113,6 +113,7 @@ int sysctl_tcp_early_retrans __read_mostly = 2;
113#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ 113#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
114#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ 114#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */
115#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ 115#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
116#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
116 117
117#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) 118#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
118#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) 119#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -2059,11 +2060,8 @@ void tcp_enter_loss(struct sock *sk, int how)
2059 if (tcp_is_reno(tp)) 2060 if (tcp_is_reno(tp))
2060 tcp_reset_reno_sack(tp); 2061 tcp_reset_reno_sack(tp);
2061 2062
2062 if (!how) { 2063 tp->undo_marker = tp->snd_una;
2063 /* Push undo marker, if it was plain RTO and nothing 2064 if (how) {
2064 * was retransmitted. */
2065 tp->undo_marker = tp->snd_una;
2066 } else {
2067 tp->sacked_out = 0; 2065 tp->sacked_out = 0;
2068 tp->fackets_out = 0; 2066 tp->fackets_out = 0;
2069 } 2067 }
@@ -3567,6 +3565,27 @@ static void tcp_send_challenge_ack(struct sock *sk)
3567 } 3565 }
3568} 3566}
3569 3567
3568static void tcp_store_ts_recent(struct tcp_sock *tp)
3569{
3570 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3571 tp->rx_opt.ts_recent_stamp = get_seconds();
3572}
3573
3574static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3575{
3576 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3577 /* PAWS bug workaround wrt. ACK frames, the PAWS discard
3578 * extra check below makes sure this can only happen
3579 * for pure ACK frames. -DaveM
3580 *
3581 * Not only, also it occurs for expired timestamps.
3582 */
3583
3584 if (tcp_paws_check(&tp->rx_opt, 0))
3585 tcp_store_ts_recent(tp);
3586 }
3587}
3588
3570/* This routine deals with incoming acks, but not outgoing ones. */ 3589/* This routine deals with incoming acks, but not outgoing ones. */
3571static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) 3590static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3572{ 3591{
@@ -3610,6 +3629,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3610 prior_fackets = tp->fackets_out; 3629 prior_fackets = tp->fackets_out;
3611 prior_in_flight = tcp_packets_in_flight(tp); 3630 prior_in_flight = tcp_packets_in_flight(tp);
3612 3631
3632 /* ts_recent update must be made after we are sure that the packet
3633 * is in window.
3634 */
3635 if (flag & FLAG_UPDATE_TS_RECENT)
3636 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
3637
3613 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) { 3638 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
3614 /* Window is constant, pure forward advance. 3639 /* Window is constant, pure forward advance.
3615 * No more checks are required. 3640 * No more checks are required.
@@ -3930,27 +3955,6 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
3930EXPORT_SYMBOL(tcp_parse_md5sig_option); 3955EXPORT_SYMBOL(tcp_parse_md5sig_option);
3931#endif 3956#endif
3932 3957
3933static inline void tcp_store_ts_recent(struct tcp_sock *tp)
3934{
3935 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3936 tp->rx_opt.ts_recent_stamp = get_seconds();
3937}
3938
3939static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3940{
3941 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3942 /* PAWS bug workaround wrt. ACK frames, the PAWS discard
3943 * extra check below makes sure this can only happen
3944 * for pure ACK frames. -DaveM
3945 *
3946 * Not only, also it occurs for expired timestamps.
3947 */
3948
3949 if (tcp_paws_check(&tp->rx_opt, 0))
3950 tcp_store_ts_recent(tp);
3951 }
3952}
3953
3954/* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM 3958/* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM
3955 * 3959 *
3956 * It is not fatal. If this ACK does _not_ change critical state (seqs, window) 3960 * It is not fatal. If this ACK does _not_ change critical state (seqs, window)
@@ -5546,14 +5550,9 @@ slow_path:
5546 return 0; 5550 return 0;
5547 5551
5548step5: 5552step5:
5549 if (tcp_ack(sk, skb, FLAG_SLOWPATH) < 0) 5553 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
5550 goto discard; 5554 goto discard;
5551 5555
5552 /* ts_recent update must be made after we are sure that the packet
5553 * is in window.
5554 */
5555 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
5556
5557 tcp_rcv_rtt_measure_ts(sk, skb); 5556 tcp_rcv_rtt_measure_ts(sk, skb);
5558 5557
5559 /* Process urgent data. */ 5558 /* Process urgent data. */
@@ -5989,7 +5988,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5989 5988
5990 /* step 5: check the ACK field */ 5989 /* step 5: check the ACK field */
5991 if (true) { 5990 if (true) {
5992 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0; 5991 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
5992 FLAG_UPDATE_TS_RECENT) > 0;
5993 5993
5994 switch (sk->sk_state) { 5994 switch (sk->sk_state) {
5995 case TCP_SYN_RECV: 5995 case TCP_SYN_RECV:
@@ -6140,11 +6140,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
6140 } 6140 }
6141 } 6141 }
6142 6142
6143 /* ts_recent update must be made after we are sure that the packet
6144 * is in window.
6145 */
6146 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
6147
6148 /* step 6: check the URG bit */ 6143 /* step 6: check the URG bit */
6149 tcp_urg(sk, skb, th); 6144 tcp_urg(sk, skb, th);
6150 6145
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4a8ec457310f..d09203c63264 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -274,13 +274,6 @@ static void tcp_v4_mtu_reduced(struct sock *sk)
274 struct inet_sock *inet = inet_sk(sk); 274 struct inet_sock *inet = inet_sk(sk);
275 u32 mtu = tcp_sk(sk)->mtu_info; 275 u32 mtu = tcp_sk(sk)->mtu_info;
276 276
277 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
278 * send out by Linux are always <576bytes so they should go through
279 * unfragmented).
280 */
281 if (sk->sk_state == TCP_LISTEN)
282 return;
283
284 dst = inet_csk_update_pmtu(sk, mtu); 277 dst = inet_csk_update_pmtu(sk, mtu);
285 if (!dst) 278 if (!dst)
286 return; 279 return;
@@ -408,6 +401,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
408 goto out; 401 goto out;
409 402
410 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ 403 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
404 /* We are not interested in TCP_LISTEN and open_requests
405 * (SYN-ACKs send out by Linux are always <576bytes so
406 * they should go through unfragmented).
407 */
408 if (sk->sk_state == TCP_LISTEN)
409 goto out;
410
411 tp->mtu_info = info; 411 tp->mtu_info = info;
412 if (!sock_owned_by_user(sk)) { 412 if (!sock_owned_by_user(sk)) {
413 tcp_v4_mtu_reduced(sk); 413 tcp_v4_mtu_reduced(sk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e2b4461074da..509912a5ff98 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1298,7 +1298,6 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
1298 eat = min_t(int, len, skb_headlen(skb)); 1298 eat = min_t(int, len, skb_headlen(skb));
1299 if (eat) { 1299 if (eat) {
1300 __skb_pull(skb, eat); 1300 __skb_pull(skb, eat);
1301 skb->avail_size -= eat;
1302 len -= eat; 1301 len -= eat;
1303 if (!len) 1302 if (!len)
1304 return; 1303 return;
@@ -1810,8 +1809,11 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1810 goto send_now; 1809 goto send_now;
1811 } 1810 }
1812 1811
1813 /* Ok, it looks like it is advisable to defer. */ 1812 /* Ok, it looks like it is advisable to defer.
1814 tp->tso_deferred = 1 | (jiffies << 1); 1813 * Do not rearm the timer if already set to not break TCP ACK clocking.
1814 */
1815 if (!tp->tso_deferred)
1816 tp->tso_deferred = 1 | (jiffies << 1);
1815 1817
1816 return true; 1818 return true;
1817 1819
@@ -2386,8 +2388,12 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2386 */ 2388 */
2387 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2389 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2388 2390
2389 /* make sure skb->data is aligned on arches that require it */ 2391 /* make sure skb->data is aligned on arches that require it
2390 if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) { 2392 * and check if ack-trimming & collapsing extended the headroom
2393 * beyond what csum_start can cover.
2394 */
2395 if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
2396 skb_headroom(skb) >= 0xFFFF)) {
2391 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, 2397 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
2392 GFP_ATOMIC); 2398 GFP_ATOMIC);
2393 return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : 2399 return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
@@ -2707,6 +2713,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2707 skb_reserve(skb, MAX_TCP_HEADER); 2713 skb_reserve(skb, MAX_TCP_HEADER);
2708 2714
2709 skb_dst_set(skb, dst); 2715 skb_dst_set(skb, dst);
2716 security_skb_owned_by(skb, sk);
2710 2717
2711 mss = dst_metric_advmss(dst); 2718 mss = dst_metric_advmss(dst);
2712 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) 2719 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 265c42cf963c..0a073a263720 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1762,9 +1762,16 @@ int udp_rcv(struct sk_buff *skb)
1762 1762
1763void udp_destroy_sock(struct sock *sk) 1763void udp_destroy_sock(struct sock *sk)
1764{ 1764{
1765 struct udp_sock *up = udp_sk(sk);
1765 bool slow = lock_sock_fast(sk); 1766 bool slow = lock_sock_fast(sk);
1766 udp_flush_pending_frames(sk); 1767 udp_flush_pending_frames(sk);
1767 unlock_sock_fast(sk, slow); 1768 unlock_sock_fast(sk, slow);
1769 if (static_key_false(&udp_encap_needed) && up->encap_type) {
1770 void (*encap_destroy)(struct sock *sk);
1771 encap_destroy = ACCESS_ONCE(up->encap_destroy);
1772 if (encap_destroy)
1773 encap_destroy(sk);
1774 }
1768} 1775}
1769 1776
1770/* 1777/*