aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/icmp.c3
-rw-r--r--net/ipv4/inet_diag.c5
-rw-r--r--net/ipv4/ip_sockglue.c35
-rw-r--r--net/ipv4/ip_vti.c5
-rw-r--r--net/ipv4/ipmr.c4
-rw-r--r--net/ipv4/netfilter/iptable_nat.c4
-rw-r--r--net/ipv4/route.c18
-rw-r--r--net/ipv4/tcp.c29
-rw-r--r--net/ipv4/tcp_illinois.c8
-rw-r--r--net/ipv4/tcp_input.c37
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/tcp_metrics.c14
-rw-r--r--net/ipv4/tcp_minisocks.c1
-rw-r--r--net/ipv4/tcp_output.c19
-rw-r--r--net/ipv4/tcp_timer.c4
-rw-r--r--net/ipv4/xfrm4_policy.c13
16 files changed, 128 insertions, 72 deletions
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index f2eccd531746..17ff9fd7cdda 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -257,7 +257,8 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
257 struct inet_peer *peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1); 257 struct inet_peer *peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1);
258 rc = inet_peer_xrlim_allow(peer, 258 rc = inet_peer_xrlim_allow(peer,
259 net->ipv4.sysctl_icmp_ratelimit); 259 net->ipv4.sysctl_icmp_ratelimit);
260 inet_putpeer(peer); 260 if (peer)
261 inet_putpeer(peer);
261 } 262 }
262out: 263out:
263 return rc; 264 return rc;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 535584c00f91..0c34bfabc11f 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -892,13 +892,16 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
892 struct inet_diag_req_v2 *r, struct nlattr *bc) 892 struct inet_diag_req_v2 *r, struct nlattr *bc)
893{ 893{
894 const struct inet_diag_handler *handler; 894 const struct inet_diag_handler *handler;
895 int err = 0;
895 896
896 handler = inet_diag_lock_handler(r->sdiag_protocol); 897 handler = inet_diag_lock_handler(r->sdiag_protocol);
897 if (!IS_ERR(handler)) 898 if (!IS_ERR(handler))
898 handler->dump(skb, cb, r, bc); 899 handler->dump(skb, cb, r, bc);
900 else
901 err = PTR_ERR(handler);
899 inet_diag_unlock_handler(handler); 902 inet_diag_unlock_handler(handler);
900 903
901 return skb->len; 904 return err ? : skb->len;
902} 905}
903 906
904static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) 907static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 5eea4a811042..14bbfcf717ac 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -457,19 +457,28 @@ static int do_ip_setsockopt(struct sock *sk, int level,
457 struct inet_sock *inet = inet_sk(sk); 457 struct inet_sock *inet = inet_sk(sk);
458 int val = 0, err; 458 int val = 0, err;
459 459
460 if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) | 460 switch (optname) {
461 (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) | 461 case IP_PKTINFO:
462 (1<<IP_RETOPTS) | (1<<IP_TOS) | 462 case IP_RECVTTL:
463 (1<<IP_TTL) | (1<<IP_HDRINCL) | 463 case IP_RECVOPTS:
464 (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | 464 case IP_RECVTOS:
465 (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | 465 case IP_RETOPTS:
466 (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) | 466 case IP_TOS:
467 (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) || 467 case IP_TTL:
468 optname == IP_UNICAST_IF || 468 case IP_HDRINCL:
469 optname == IP_MULTICAST_TTL || 469 case IP_MTU_DISCOVER:
470 optname == IP_MULTICAST_ALL || 470 case IP_RECVERR:
471 optname == IP_MULTICAST_LOOP || 471 case IP_ROUTER_ALERT:
472 optname == IP_RECVORIGDSTADDR) { 472 case IP_FREEBIND:
473 case IP_PASSSEC:
474 case IP_TRANSPARENT:
475 case IP_MINTTL:
476 case IP_NODEFRAG:
477 case IP_UNICAST_IF:
478 case IP_MULTICAST_TTL:
479 case IP_MULTICAST_ALL:
480 case IP_MULTICAST_LOOP:
481 case IP_RECVORIGDSTADDR:
473 if (optlen >= sizeof(int)) { 482 if (optlen >= sizeof(int)) {
474 if (get_user(val, (int __user *) optval)) 483 if (get_user(val, (int __user *) optval))
475 return -EFAULT; 484 return -EFAULT;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 1831092f999f..858fddf6482a 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -338,12 +338,17 @@ static int vti_rcv(struct sk_buff *skb)
338 if (tunnel != NULL) { 338 if (tunnel != NULL) {
339 struct pcpu_tstats *tstats; 339 struct pcpu_tstats *tstats;
340 340
341 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
342 return -1;
343
341 tstats = this_cpu_ptr(tunnel->dev->tstats); 344 tstats = this_cpu_ptr(tunnel->dev->tstats);
342 u64_stats_update_begin(&tstats->syncp); 345 u64_stats_update_begin(&tstats->syncp);
343 tstats->rx_packets++; 346 tstats->rx_packets++;
344 tstats->rx_bytes += skb->len; 347 tstats->rx_bytes += skb->len;
345 u64_stats_update_end(&tstats->syncp); 348 u64_stats_update_end(&tstats->syncp);
346 349
350 skb->mark = 0;
351 secpath_reset(skb);
347 skb->dev = tunnel->dev; 352 skb->dev = tunnel->dev;
348 return 1; 353 return 1;
349 } 354 }
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 6168c4dc58b1..3eab2b2ffd34 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1318,6 +1318,10 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1318 if (get_user(v, (u32 __user *)optval)) 1318 if (get_user(v, (u32 __user *)optval))
1319 return -EFAULT; 1319 return -EFAULT;
1320 1320
1321 /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */
1322 if (v != RT_TABLE_DEFAULT && v >= 1000000000)
1323 return -EINVAL;
1324
1321 rtnl_lock(); 1325 rtnl_lock();
1322 ret = 0; 1326 ret = 0;
1323 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1327 if (sk == rtnl_dereference(mrt->mroute_sk)) {
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 9e0ffaf1d942..a82047282dbb 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -184,7 +184,8 @@ nf_nat_ipv4_out(unsigned int hooknum,
184 184
185 if ((ct->tuplehash[dir].tuple.src.u3.ip != 185 if ((ct->tuplehash[dir].tuple.src.u3.ip !=
186 ct->tuplehash[!dir].tuple.dst.u3.ip) || 186 ct->tuplehash[!dir].tuple.dst.u3.ip) ||
187 (ct->tuplehash[dir].tuple.src.u.all != 187 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
188 ct->tuplehash[dir].tuple.src.u.all !=
188 ct->tuplehash[!dir].tuple.dst.u.all)) 189 ct->tuplehash[!dir].tuple.dst.u.all))
189 if (nf_xfrm_me_harder(skb, AF_INET) < 0) 190 if (nf_xfrm_me_harder(skb, AF_INET) < 0)
190 ret = NF_DROP; 191 ret = NF_DROP;
@@ -221,6 +222,7 @@ nf_nat_ipv4_local_fn(unsigned int hooknum,
221 } 222 }
222#ifdef CONFIG_XFRM 223#ifdef CONFIG_XFRM
223 else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && 224 else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
225 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
224 ct->tuplehash[dir].tuple.dst.u.all != 226 ct->tuplehash[dir].tuple.dst.u.all !=
225 ct->tuplehash[!dir].tuple.src.u.all) 227 ct->tuplehash[!dir].tuple.src.u.all)
226 if (nf_xfrm_me_harder(skb, AF_INET) < 0) 228 if (nf_xfrm_me_harder(skb, AF_INET) < 0)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 432f4bb77238..df251424d816 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1163,8 +1163,12 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
1163 spin_lock_bh(&fnhe_lock); 1163 spin_lock_bh(&fnhe_lock);
1164 1164
1165 if (daddr == fnhe->fnhe_daddr) { 1165 if (daddr == fnhe->fnhe_daddr) {
1166 struct rtable *orig; 1166 struct rtable *orig = rcu_dereference(fnhe->fnhe_rth);
1167 1167 if (orig && rt_is_expired(orig)) {
1168 fnhe->fnhe_gw = 0;
1169 fnhe->fnhe_pmtu = 0;
1170 fnhe->fnhe_expires = 0;
1171 }
1168 if (fnhe->fnhe_pmtu) { 1172 if (fnhe->fnhe_pmtu) {
1169 unsigned long expires = fnhe->fnhe_expires; 1173 unsigned long expires = fnhe->fnhe_expires;
1170 unsigned long diff = expires - jiffies; 1174 unsigned long diff = expires - jiffies;
@@ -1181,7 +1185,6 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
1181 } else if (!rt->rt_gateway) 1185 } else if (!rt->rt_gateway)
1182 rt->rt_gateway = daddr; 1186 rt->rt_gateway = daddr;
1183 1187
1184 orig = rcu_dereference(fnhe->fnhe_rth);
1185 rcu_assign_pointer(fnhe->fnhe_rth, rt); 1188 rcu_assign_pointer(fnhe->fnhe_rth, rt);
1186 if (orig) 1189 if (orig)
1187 rt_free(orig); 1190 rt_free(orig);
@@ -1782,6 +1785,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1782 if (dev_out->flags & IFF_LOOPBACK) 1785 if (dev_out->flags & IFF_LOOPBACK)
1783 flags |= RTCF_LOCAL; 1786 flags |= RTCF_LOCAL;
1784 1787
1788 do_cache = true;
1785 if (type == RTN_BROADCAST) { 1789 if (type == RTN_BROADCAST) {
1786 flags |= RTCF_BROADCAST | RTCF_LOCAL; 1790 flags |= RTCF_BROADCAST | RTCF_LOCAL;
1787 fi = NULL; 1791 fi = NULL;
@@ -1790,6 +1794,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1790 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr, 1794 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
1791 fl4->flowi4_proto)) 1795 fl4->flowi4_proto))
1792 flags &= ~RTCF_LOCAL; 1796 flags &= ~RTCF_LOCAL;
1797 else
1798 do_cache = false;
1793 /* If multicast route do not exist use 1799 /* If multicast route do not exist use
1794 * default one, but do not gateway in this case. 1800 * default one, but do not gateway in this case.
1795 * Yes, it is hack. 1801 * Yes, it is hack.
@@ -1799,8 +1805,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1799 } 1805 }
1800 1806
1801 fnhe = NULL; 1807 fnhe = NULL;
1802 do_cache = fi != NULL; 1808 do_cache &= fi != NULL;
1803 if (fi) { 1809 if (do_cache) {
1804 struct rtable __rcu **prth; 1810 struct rtable __rcu **prth;
1805 struct fib_nh *nh = &FIB_RES_NH(*res); 1811 struct fib_nh *nh = &FIB_RES_NH(*res);
1806 1812
@@ -2594,7 +2600,7 @@ int __init ip_rt_init(void)
2594 pr_err("Unable to create route proc files\n"); 2600 pr_err("Unable to create route proc files\n");
2595#ifdef CONFIG_XFRM 2601#ifdef CONFIG_XFRM
2596 xfrm_init(); 2602 xfrm_init();
2597 xfrm4_init(ip_rt_max_size); 2603 xfrm4_init();
2598#endif 2604#endif
2599 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL); 2605 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
2600 2606
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f32c02e2a543..e457c7ab2e28 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -549,14 +549,12 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
549 !tp->urg_data || 549 !tp->urg_data ||
550 before(tp->urg_seq, tp->copied_seq) || 550 before(tp->urg_seq, tp->copied_seq) ||
551 !before(tp->urg_seq, tp->rcv_nxt)) { 551 !before(tp->urg_seq, tp->rcv_nxt)) {
552 struct sk_buff *skb;
553 552
554 answ = tp->rcv_nxt - tp->copied_seq; 553 answ = tp->rcv_nxt - tp->copied_seq;
555 554
556 /* Subtract 1, if FIN is in queue. */ 555 /* Subtract 1, if FIN was received */
557 skb = skb_peek_tail(&sk->sk_receive_queue); 556 if (answ && sock_flag(sk, SOCK_DONE))
558 if (answ && skb) 557 answ--;
559 answ -= tcp_hdr(skb)->fin;
560 } else 558 } else
561 answ = tp->urg_seq - tp->copied_seq; 559 answ = tp->urg_seq - tp->copied_seq;
562 release_sock(sk); 560 release_sock(sk);
@@ -832,8 +830,8 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
832 return mss_now; 830 return mss_now;
833} 831}
834 832
835static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, 833static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
836 size_t psize, int flags) 834 size_t size, int flags)
837{ 835{
838 struct tcp_sock *tp = tcp_sk(sk); 836 struct tcp_sock *tp = tcp_sk(sk);
839 int mss_now, size_goal; 837 int mss_now, size_goal;
@@ -860,12 +858,9 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
860 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 858 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
861 goto out_err; 859 goto out_err;
862 860
863 while (psize > 0) { 861 while (size > 0) {
864 struct sk_buff *skb = tcp_write_queue_tail(sk); 862 struct sk_buff *skb = tcp_write_queue_tail(sk);
865 struct page *page = pages[poffset / PAGE_SIZE];
866 int copy, i; 863 int copy, i;
867 int offset = poffset % PAGE_SIZE;
868 int size = min_t(size_t, psize, PAGE_SIZE - offset);
869 bool can_coalesce; 864 bool can_coalesce;
870 865
871 if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) { 866 if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
@@ -914,8 +909,8 @@ new_segment:
914 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; 909 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
915 910
916 copied += copy; 911 copied += copy;
917 poffset += copy; 912 offset += copy;
918 if (!(psize -= copy)) 913 if (!(size -= copy))
919 goto out; 914 goto out;
920 915
921 if (skb->len < size_goal || (flags & MSG_OOB)) 916 if (skb->len < size_goal || (flags & MSG_OOB))
@@ -962,7 +957,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
962 flags); 957 flags);
963 958
964 lock_sock(sk); 959 lock_sock(sk);
965 res = do_tcp_sendpages(sk, &page, offset, size, flags); 960 res = do_tcp_sendpages(sk, page, offset, size, flags);
966 release_sock(sk); 961 release_sock(sk);
967 return res; 962 return res;
968} 963}
@@ -1214,7 +1209,7 @@ new_segment:
1214wait_for_sndbuf: 1209wait_for_sndbuf:
1215 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1210 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1216wait_for_memory: 1211wait_for_memory:
1217 if (copied && likely(!tp->repair)) 1212 if (copied)
1218 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); 1213 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
1219 1214
1220 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) 1215 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
@@ -1225,7 +1220,7 @@ wait_for_memory:
1225 } 1220 }
1226 1221
1227out: 1222out:
1228 if (copied && likely(!tp->repair)) 1223 if (copied)
1229 tcp_push(sk, flags, mss_now, tp->nonagle); 1224 tcp_push(sk, flags, mss_now, tp->nonagle);
1230 release_sock(sk); 1225 release_sock(sk);
1231 return copied + copied_syn; 1226 return copied + copied_syn;
@@ -2766,6 +2761,8 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
2766 info->tcpi_options |= TCPI_OPT_ECN; 2761 info->tcpi_options |= TCPI_OPT_ECN;
2767 if (tp->ecn_flags & TCP_ECN_SEEN) 2762 if (tp->ecn_flags & TCP_ECN_SEEN)
2768 info->tcpi_options |= TCPI_OPT_ECN_SEEN; 2763 info->tcpi_options |= TCPI_OPT_ECN_SEEN;
2764 if (tp->syn_data_acked)
2765 info->tcpi_options |= TCPI_OPT_SYN_DATA;
2769 2766
2770 info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); 2767 info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
2771 info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato); 2768 info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato);
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 813b43a76fec..834857f3c871 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -313,11 +313,13 @@ static void tcp_illinois_info(struct sock *sk, u32 ext,
313 .tcpv_rttcnt = ca->cnt_rtt, 313 .tcpv_rttcnt = ca->cnt_rtt,
314 .tcpv_minrtt = ca->base_rtt, 314 .tcpv_minrtt = ca->base_rtt,
315 }; 315 };
316 u64 t = ca->sum_rtt;
317 316
318 do_div(t, ca->cnt_rtt); 317 if (info.tcpv_rttcnt > 0) {
319 info.tcpv_rtt = t; 318 u64 t = ca->sum_rtt;
320 319
320 do_div(t, info.tcpv_rttcnt);
321 info.tcpv_rtt = t;
322 }
321 nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); 323 nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
322 } 324 }
323} 325}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 432c36649db3..181fc8234a52 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4529,6 +4529,9 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4529 struct tcphdr *th; 4529 struct tcphdr *th;
4530 bool fragstolen; 4530 bool fragstolen;
4531 4531
4532 if (size == 0)
4533 return 0;
4534
4532 skb = alloc_skb(size + sizeof(*th), sk->sk_allocation); 4535 skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
4533 if (!skb) 4536 if (!skb)
4534 goto err; 4537 goto err;
@@ -5310,11 +5313,6 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5310 goto discard; 5313 goto discard;
5311 } 5314 }
5312 5315
5313 /* ts_recent update must be made after we are sure that the packet
5314 * is in window.
5315 */
5316 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
5317
5318 /* step 3: check security and precedence [ignored] */ 5316 /* step 3: check security and precedence [ignored] */
5319 5317
5320 /* step 4: Check for a SYN 5318 /* step 4: Check for a SYN
@@ -5549,6 +5547,11 @@ step5:
5549 if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0) 5547 if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0)
5550 goto discard; 5548 goto discard;
5551 5549
5550 /* ts_recent update must be made after we are sure that the packet
5551 * is in window.
5552 */
5553 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
5554
5552 tcp_rcv_rtt_measure_ts(sk, skb); 5555 tcp_rcv_rtt_measure_ts(sk, skb);
5553 5556
5554 /* Process urgent data. */ 5557 /* Process urgent data. */
@@ -5642,10 +5645,15 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5642 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop); 5645 tcp_fastopen_cache_set(sk, mss, cookie, syn_drop);
5643 5646
5644 if (data) { /* Retransmit unacked data in SYN */ 5647 if (data) { /* Retransmit unacked data in SYN */
5645 tcp_retransmit_skb(sk, data); 5648 tcp_for_write_queue_from(data, sk) {
5649 if (data == tcp_send_head(sk) ||
5650 __tcp_retransmit_skb(sk, data))
5651 break;
5652 }
5646 tcp_rearm_rto(sk); 5653 tcp_rearm_rto(sk);
5647 return true; 5654 return true;
5648 } 5655 }
5656 tp->syn_data_acked = tp->syn_data;
5649 return false; 5657 return false;
5650} 5658}
5651 5659
@@ -5963,7 +5971,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5963 5971
5964 req = tp->fastopen_rsk; 5972 req = tp->fastopen_rsk;
5965 if (req != NULL) { 5973 if (req != NULL) {
5966 BUG_ON(sk->sk_state != TCP_SYN_RECV && 5974 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
5967 sk->sk_state != TCP_FIN_WAIT1); 5975 sk->sk_state != TCP_FIN_WAIT1);
5968 5976
5969 if (tcp_check_req(sk, skb, req, NULL, true) == NULL) 5977 if (tcp_check_req(sk, skb, req, NULL, true) == NULL)
@@ -6052,7 +6060,15 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
6052 * ACK we have received, this would have acknowledged 6060 * ACK we have received, this would have acknowledged
6053 * our SYNACK so stop the SYNACK timer. 6061 * our SYNACK so stop the SYNACK timer.
6054 */ 6062 */
6055 if (acceptable && req != NULL) { 6063 if (req != NULL) {
6064 /* Return RST if ack_seq is invalid.
6065 * Note that RFC793 only says to generate a
6066 * DUPACK for it but for TCP Fast Open it seems
6067 * better to treat this case like TCP_SYN_RECV
6068 * above.
6069 */
6070 if (!acceptable)
6071 return 1;
6056 /* We no longer need the request sock. */ 6072 /* We no longer need the request sock. */
6057 reqsk_fastopen_remove(sk, req, false); 6073 reqsk_fastopen_remove(sk, req, false);
6058 tcp_rearm_rto(sk); 6074 tcp_rearm_rto(sk);
@@ -6118,6 +6134,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
6118 } else 6134 } else
6119 goto discard; 6135 goto discard;
6120 6136
6137 /* ts_recent update must be made after we are sure that the packet
6138 * is in window.
6139 */
6140 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
6141
6121 /* step 6: check the URG bit */ 6142 /* step 6: check the URG bit */
6122 tcp_urg(sk, skb, th); 6143 tcp_urg(sk, skb, th);
6123 6144
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ef998b008a57..0c4a64355603 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1461,6 +1461,7 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
1461 skb_set_owner_r(skb, child); 1461 skb_set_owner_r(skb, child);
1462 __skb_queue_tail(&child->sk_receive_queue, skb); 1462 __skb_queue_tail(&child->sk_receive_queue, skb);
1463 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 1463 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1464 tp->syn_data_acked = 1;
1464 } 1465 }
1465 sk->sk_data_ready(sk, 0); 1466 sk->sk_data_ready(sk, 0);
1466 bh_unlock_sock(child); 1467 bh_unlock_sock(child);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 4c752a6e0bcd..f696d7c2e9fa 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -1,7 +1,6 @@
1#include <linux/rcupdate.h> 1#include <linux/rcupdate.h>
2#include <linux/spinlock.h> 2#include <linux/spinlock.h>
3#include <linux/jiffies.h> 3#include <linux/jiffies.h>
4#include <linux/bootmem.h>
5#include <linux/module.h> 4#include <linux/module.h>
6#include <linux/cache.h> 5#include <linux/cache.h>
7#include <linux/slab.h> 6#include <linux/slab.h>
@@ -9,6 +8,7 @@
9#include <linux/tcp.h> 8#include <linux/tcp.h>
10#include <linux/hash.h> 9#include <linux/hash.h>
11#include <linux/tcp_metrics.h> 10#include <linux/tcp_metrics.h>
11#include <linux/vmalloc.h>
12 12
13#include <net/inet_connection_sock.h> 13#include <net/inet_connection_sock.h>
14#include <net/net_namespace.h> 14#include <net/net_namespace.h>
@@ -864,7 +864,7 @@ static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
864 } 864 }
865 a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV6]; 865 a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV6];
866 if (a) { 866 if (a) {
867 if (nla_len(a) != sizeof(sizeof(struct in6_addr))) 867 if (nla_len(a) != sizeof(struct in6_addr))
868 return -EINVAL; 868 return -EINVAL;
869 addr->family = AF_INET6; 869 addr->family = AF_INET6;
870 memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6)); 870 memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6));
@@ -1034,7 +1034,10 @@ static int __net_init tcp_net_metrics_init(struct net *net)
1034 net->ipv4.tcp_metrics_hash_log = order_base_2(slots); 1034 net->ipv4.tcp_metrics_hash_log = order_base_2(slots);
1035 size = sizeof(struct tcpm_hash_bucket) << net->ipv4.tcp_metrics_hash_log; 1035 size = sizeof(struct tcpm_hash_bucket) << net->ipv4.tcp_metrics_hash_log;
1036 1036
1037 net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL); 1037 net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
1038 if (!net->ipv4.tcp_metrics_hash)
1039 net->ipv4.tcp_metrics_hash = vzalloc(size);
1040
1038 if (!net->ipv4.tcp_metrics_hash) 1041 if (!net->ipv4.tcp_metrics_hash)
1039 return -ENOMEM; 1042 return -ENOMEM;
1040 1043
@@ -1055,7 +1058,10 @@ static void __net_exit tcp_net_metrics_exit(struct net *net)
1055 tm = next; 1058 tm = next;
1056 } 1059 }
1057 } 1060 }
1058 kfree(net->ipv4.tcp_metrics_hash); 1061 if (is_vmalloc_addr(net->ipv4.tcp_metrics_hash))
1062 vfree(net->ipv4.tcp_metrics_hash);
1063 else
1064 kfree(net->ipv4.tcp_metrics_hash);
1059} 1065}
1060 1066
1061static __net_initdata struct pernet_operations tcp_net_metrics_ops = { 1067static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 27536ba16c9d..a7302d974f32 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -510,6 +510,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
510 newtp->rx_opt.mss_clamp = req->mss; 510 newtp->rx_opt.mss_clamp = req->mss;
511 TCP_ECN_openreq_child(newtp, req); 511 TCP_ECN_openreq_child(newtp, req);
512 newtp->fastopen_rsk = NULL; 512 newtp->fastopen_rsk = NULL;
513 newtp->syn_data_acked = 0;
513 514
514 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); 515 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
515 } 516 }
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index cfe6ffe1c177..948ac275b9b5 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1986,6 +1986,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1986 tso_segs = tcp_init_tso_segs(sk, skb, mss_now); 1986 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1987 BUG_ON(!tso_segs); 1987 BUG_ON(!tso_segs);
1988 1988
1989 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE)
1990 goto repair; /* Skip network transmission */
1991
1989 cwnd_quota = tcp_cwnd_test(tp, skb); 1992 cwnd_quota = tcp_cwnd_test(tp, skb);
1990 if (!cwnd_quota) 1993 if (!cwnd_quota)
1991 break; 1994 break;
@@ -2026,6 +2029,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2026 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) 2029 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
2027 break; 2030 break;
2028 2031
2032repair:
2029 /* Advance the send_head. This one is sent out. 2033 /* Advance the send_head. This one is sent out.
2030 * This call will increment packets_out. 2034 * This call will increment packets_out.
2031 */ 2035 */
@@ -2305,12 +2309,11 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2305 * state updates are done by the caller. Returns non-zero if an 2309 * state updates are done by the caller. Returns non-zero if an
2306 * error occurred which prevented the send. 2310 * error occurred which prevented the send.
2307 */ 2311 */
2308int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) 2312int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2309{ 2313{
2310 struct tcp_sock *tp = tcp_sk(sk); 2314 struct tcp_sock *tp = tcp_sk(sk);
2311 struct inet_connection_sock *icsk = inet_csk(sk); 2315 struct inet_connection_sock *icsk = inet_csk(sk);
2312 unsigned int cur_mss; 2316 unsigned int cur_mss;
2313 int err;
2314 2317
2315 /* Inconslusive MTU probe */ 2318 /* Inconslusive MTU probe */
2316 if (icsk->icsk_mtup.probe_size) { 2319 if (icsk->icsk_mtup.probe_size) {
@@ -2383,11 +2386,17 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2383 if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) { 2386 if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) {
2384 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, 2387 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
2385 GFP_ATOMIC); 2388 GFP_ATOMIC);
2386 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : 2389 return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2387 -ENOBUFS; 2390 -ENOBUFS;
2388 } else { 2391 } else {
2389 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2392 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2390 } 2393 }
2394}
2395
2396int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2397{
2398 struct tcp_sock *tp = tcp_sk(sk);
2399 int err = __tcp_retransmit_skb(sk, skb);
2391 2400
2392 if (err == 0) { 2401 if (err == 0) {
2393 /* Update global TCP statistics. */ 2402 /* Update global TCP statistics. */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index fc04711e80c8..d47c1b4421a3 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -347,8 +347,8 @@ void tcp_retransmit_timer(struct sock *sk)
347 return; 347 return;
348 } 348 }
349 if (tp->fastopen_rsk) { 349 if (tp->fastopen_rsk) {
350 BUG_ON(sk->sk_state != TCP_SYN_RECV && 350 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
351 sk->sk_state != TCP_FIN_WAIT1); 351 sk->sk_state != TCP_FIN_WAIT1);
352 tcp_fastopen_synack_timer(sk); 352 tcp_fastopen_synack_timer(sk);
353 /* Before we receive ACK to our SYN-ACK don't retransmit 353 /* Before we receive ACK to our SYN-ACK don't retransmit
354 * anything else (e.g., data or FIN segments). 354 * anything else (e.g., data or FIN segments).
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 05c5ab8d983c..3be0ac2c1920 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -279,19 +279,8 @@ static void __exit xfrm4_policy_fini(void)
279 xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); 279 xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo);
280} 280}
281 281
282void __init xfrm4_init(int rt_max_size) 282void __init xfrm4_init(void)
283{ 283{
284 /*
285 * Select a default value for the gc_thresh based on the main route
286 * table hash size. It seems to me the worst case scenario is when
287 * we have ipsec operating in transport mode, in which we create a
288 * dst_entry per socket. The xfrm gc algorithm starts trying to remove
289 * entries at gc_thresh, and prevents new allocations as 2*gc_thresh
290 * so lets set an initial xfrm gc_thresh value at the rt_max_size/2.
291 * That will let us store an ipsec connection per route table entry,
292 * and start cleaning when were 1/2 full
293 */
294 xfrm4_dst_ops.gc_thresh = rt_max_size/2;
295 dst_entries_init(&xfrm4_dst_ops); 284 dst_entries_init(&xfrm4_dst_ops);
296 285
297 xfrm4_state_init(); 286 xfrm4_state_init();