aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c181
1 files changed, 99 insertions, 82 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c8d28c433b2b..42b2a6a73092 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -209,22 +209,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
209 } 209 }
210 210
211 if (tcp_death_row.sysctl_tw_recycle && 211 if (tcp_death_row.sysctl_tw_recycle &&
212 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) { 212 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
213 struct inet_peer *peer = rt_get_peer(rt, fl4->daddr); 213 tcp_fetch_timewait_stamp(sk, &rt->dst);
214 /*
215 * VJ's idea. We save last timestamp seen from
216 * the destination in peer table, when entering state
217 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
218 * when trying new connection.
219 */
220 if (peer) {
221 inet_peer_refcheck(peer);
222 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
223 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
224 tp->rx_opt.ts_recent = peer->tcp_ts;
225 }
226 }
227 }
228 214
229 inet->inet_dport = usin->sin_port; 215 inet->inet_dport = usin->sin_port;
230 inet->inet_daddr = daddr; 216 inet->inet_daddr = daddr;
@@ -289,12 +275,15 @@ failure:
289EXPORT_SYMBOL(tcp_v4_connect); 275EXPORT_SYMBOL(tcp_v4_connect);
290 276
291/* 277/*
292 * This routine does path mtu discovery as defined in RFC1191. 278 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
279 * It can be called through tcp_release_cb() if socket was owned by user
280 * at the time tcp_v4_err() was called to handle ICMP message.
293 */ 281 */
294static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu) 282static void tcp_v4_mtu_reduced(struct sock *sk)
295{ 283{
296 struct dst_entry *dst; 284 struct dst_entry *dst;
297 struct inet_sock *inet = inet_sk(sk); 285 struct inet_sock *inet = inet_sk(sk);
286 u32 mtu = tcp_sk(sk)->mtu_info;
298 287
299 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs 288 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
300 * send out by Linux are always <576bytes so they should go through 289 * send out by Linux are always <576bytes so they should go through
@@ -303,17 +292,10 @@ static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
303 if (sk->sk_state == TCP_LISTEN) 292 if (sk->sk_state == TCP_LISTEN)
304 return; 293 return;
305 294
306 /* We don't check in the destentry if pmtu discovery is forbidden 295 dst = inet_csk_update_pmtu(sk, mtu);
307 * on this route. We just assume that no packet_to_big packets 296 if (!dst)
308 * are send back when pmtu discovery is not active.
309 * There is a small race when the user changes this flag in the
310 * route, but I think that's acceptable.
311 */
312 if ((dst = __sk_dst_check(sk, 0)) == NULL)
313 return; 297 return;
314 298
315 dst->ops->update_pmtu(dst, mtu);
316
317 /* Something is about to be wrong... Remember soft error 299 /* Something is about to be wrong... Remember soft error
318 * for the case, if this connection will not able to recover. 300 * for the case, if this connection will not able to recover.
319 */ 301 */
@@ -335,6 +317,14 @@ static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
335 } /* else let the usual retransmit timer handle it */ 317 } /* else let the usual retransmit timer handle it */
336} 318}
337 319
320static void do_redirect(struct sk_buff *skb, struct sock *sk)
321{
322 struct dst_entry *dst = __sk_dst_check(sk, 0);
323
324 if (dst)
325 dst->ops->redirect(dst, sk, skb);
326}
327
338/* 328/*
339 * This routine is called by the ICMP module when it gets some 329 * This routine is called by the ICMP module when it gets some
340 * sort of error condition. If err < 0 then the socket should 330 * sort of error condition. If err < 0 then the socket should
@@ -386,8 +376,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
386 bh_lock_sock(sk); 376 bh_lock_sock(sk);
387 /* If too many ICMPs get dropped on busy 377 /* If too many ICMPs get dropped on busy
388 * servers this needs to be solved differently. 378 * servers this needs to be solved differently.
379 * We do take care of PMTU discovery (RFC1191) special case :
380 * we can receive locally generated ICMP messages while socket is held.
389 */ 381 */
390 if (sock_owned_by_user(sk)) 382 if (sock_owned_by_user(sk) &&
383 type != ICMP_DEST_UNREACH &&
384 code != ICMP_FRAG_NEEDED)
391 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); 385 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
392 386
393 if (sk->sk_state == TCP_CLOSE) 387 if (sk->sk_state == TCP_CLOSE)
@@ -408,6 +402,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
408 } 402 }
409 403
410 switch (type) { 404 switch (type) {
405 case ICMP_REDIRECT:
406 do_redirect(icmp_skb, sk);
407 goto out;
411 case ICMP_SOURCE_QUENCH: 408 case ICMP_SOURCE_QUENCH:
412 /* Just silently ignore these. */ 409 /* Just silently ignore these. */
413 goto out; 410 goto out;
@@ -419,8 +416,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
419 goto out; 416 goto out;
420 417
421 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ 418 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
419 tp->mtu_info = info;
422 if (!sock_owned_by_user(sk)) 420 if (!sock_owned_by_user(sk))
423 do_pmtu_discovery(sk, iph, info); 421 tcp_v4_mtu_reduced(sk);
422 else
423 set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags);
424 goto out; 424 goto out;
425 } 425 }
426 426
@@ -698,8 +698,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
698 698
699 net = dev_net(skb_dst(skb)->dev); 699 net = dev_net(skb_dst(skb)->dev);
700 arg.tos = ip_hdr(skb)->tos; 700 arg.tos = ip_hdr(skb)->tos;
701 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, 701 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
702 &arg, arg.iov[0].iov_len); 702 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
703 703
704 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 704 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
705 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); 705 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
@@ -781,8 +781,8 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
781 if (oif) 781 if (oif)
782 arg.bound_dev_if = oif; 782 arg.bound_dev_if = oif;
783 arg.tos = tos; 783 arg.tos = tos;
784 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, 784 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
785 &arg, arg.iov[0].iov_len); 785 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
786 786
787 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 787 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
788} 788}
@@ -825,7 +825,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
825static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, 825static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
826 struct request_sock *req, 826 struct request_sock *req,
827 struct request_values *rvp, 827 struct request_values *rvp,
828 u16 queue_mapping) 828 u16 queue_mapping,
829 bool nocache)
829{ 830{
830 const struct inet_request_sock *ireq = inet_rsk(req); 831 const struct inet_request_sock *ireq = inet_rsk(req);
831 struct flowi4 fl4; 832 struct flowi4 fl4;
@@ -848,7 +849,6 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
848 err = net_xmit_eval(err); 849 err = net_xmit_eval(err);
849 } 850 }
850 851
851 dst_release(dst);
852 return err; 852 return err;
853} 853}
854 854
@@ -856,7 +856,7 @@ static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
856 struct request_values *rvp) 856 struct request_values *rvp)
857{ 857{
858 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 858 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
859 return tcp_v4_send_synack(sk, NULL, req, rvp, 0); 859 return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
860} 860}
861 861
862/* 862/*
@@ -1317,7 +1317,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1317 tcp_clear_options(&tmp_opt); 1317 tcp_clear_options(&tmp_opt);
1318 tmp_opt.mss_clamp = TCP_MSS_DEFAULT; 1318 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1319 tmp_opt.user_mss = tp->rx_opt.user_mss; 1319 tmp_opt.user_mss = tp->rx_opt.user_mss;
1320 tcp_parse_options(skb, &tmp_opt, &hash_location, 0); 1320 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
1321 1321
1322 if (tmp_opt.cookie_plus > 0 && 1322 if (tmp_opt.cookie_plus > 0 &&
1323 tmp_opt.saw_tstamp && 1323 tmp_opt.saw_tstamp &&
@@ -1375,7 +1375,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1375 isn = cookie_v4_init_sequence(sk, skb, &req->mss); 1375 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1376 req->cookie_ts = tmp_opt.tstamp_ok; 1376 req->cookie_ts = tmp_opt.tstamp_ok;
1377 } else if (!isn) { 1377 } else if (!isn) {
1378 struct inet_peer *peer = NULL;
1379 struct flowi4 fl4; 1378 struct flowi4 fl4;
1380 1379
1381 /* VJ's idea. We save last timestamp seen 1380 /* VJ's idea. We save last timestamp seen
@@ -1390,12 +1389,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1390 if (tmp_opt.saw_tstamp && 1389 if (tmp_opt.saw_tstamp &&
1391 tcp_death_row.sysctl_tw_recycle && 1390 tcp_death_row.sysctl_tw_recycle &&
1392 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && 1391 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1393 fl4.daddr == saddr && 1392 fl4.daddr == saddr) {
1394 (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) { 1393 if (!tcp_peer_is_proven(req, dst, true)) {
1395 inet_peer_refcheck(peer);
1396 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1397 (s32)(peer->tcp_ts - req->ts_recent) >
1398 TCP_PAWS_WINDOW) {
1399 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); 1394 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1400 goto drop_and_release; 1395 goto drop_and_release;
1401 } 1396 }
@@ -1404,8 +1399,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1404 else if (!sysctl_tcp_syncookies && 1399 else if (!sysctl_tcp_syncookies &&
1405 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < 1400 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1406 (sysctl_max_syn_backlog >> 2)) && 1401 (sysctl_max_syn_backlog >> 2)) &&
1407 (!peer || !peer->tcp_ts_stamp) && 1402 !tcp_peer_is_proven(req, dst, false)) {
1408 (!dst || !dst_metric(dst, RTAX_RTT))) {
1409 /* Without syncookies last quarter of 1403 /* Without syncookies last quarter of
1410 * backlog is filled with destinations, 1404 * backlog is filled with destinations,
1411 * proven to be alive. 1405 * proven to be alive.
@@ -1425,7 +1419,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1425 1419
1426 if (tcp_v4_send_synack(sk, dst, req, 1420 if (tcp_v4_send_synack(sk, dst, req,
1427 (struct request_values *)&tmp_ext, 1421 (struct request_values *)&tmp_ext,
1428 skb_get_queue_mapping(skb)) || 1422 skb_get_queue_mapping(skb),
1423 want_cookie) ||
1429 want_cookie) 1424 want_cookie)
1430 goto drop_and_free; 1425 goto drop_and_free;
1431 1426
@@ -1622,7 +1617,19 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1622#endif 1617#endif
1623 1618
1624 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1619 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1620 struct dst_entry *dst = sk->sk_rx_dst;
1621
1625 sock_rps_save_rxhash(sk, skb); 1622 sock_rps_save_rxhash(sk, skb);
1623 if (dst) {
1624 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1625 dst->ops->check(dst, 0) == NULL) {
1626 dst_release(dst);
1627 sk->sk_rx_dst = NULL;
1628 }
1629 }
1630 if (unlikely(sk->sk_rx_dst == NULL))
1631 inet_sk_rx_dst_set(sk, skb);
1632
1626 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { 1633 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1627 rsk = sk; 1634 rsk = sk;
1628 goto reset; 1635 goto reset;
@@ -1672,6 +1679,44 @@ csum_err:
1672} 1679}
1673EXPORT_SYMBOL(tcp_v4_do_rcv); 1680EXPORT_SYMBOL(tcp_v4_do_rcv);
1674 1681
1682void tcp_v4_early_demux(struct sk_buff *skb)
1683{
1684 struct net *net = dev_net(skb->dev);
1685 const struct iphdr *iph;
1686 const struct tcphdr *th;
1687 struct sock *sk;
1688
1689 if (skb->pkt_type != PACKET_HOST)
1690 return;
1691
1692 if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr)))
1693 return;
1694
1695 iph = ip_hdr(skb);
1696 th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb));
1697
1698 if (th->doff < sizeof(struct tcphdr) / 4)
1699 return;
1700
1701 sk = __inet_lookup_established(net, &tcp_hashinfo,
1702 iph->saddr, th->source,
1703 iph->daddr, ntohs(th->dest),
1704 skb->skb_iif);
1705 if (sk) {
1706 skb->sk = sk;
1707 skb->destructor = sock_edemux;
1708 if (sk->sk_state != TCP_TIME_WAIT) {
1709 struct dst_entry *dst = sk->sk_rx_dst;
1710
1711 if (dst)
1712 dst = dst_check(dst, 0);
1713 if (dst &&
1714 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1715 skb_dst_set_noref(skb, dst);
1716 }
1717 }
1718}
1719
1675/* 1720/*
1676 * From tcp_input.c 1721 * From tcp_input.c
1677 */ 1722 */
@@ -1821,40 +1866,10 @@ do_time_wait:
1821 goto discard_it; 1866 goto discard_it;
1822} 1867}
1823 1868
1824struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1825{
1826 struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1827 struct inet_sock *inet = inet_sk(sk);
1828 struct inet_peer *peer;
1829
1830 if (!rt ||
1831 inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
1832 peer = inet_getpeer_v4(inet->inet_daddr, 1);
1833 *release_it = true;
1834 } else {
1835 if (!rt->peer)
1836 rt_bind_peer(rt, inet->inet_daddr, 1);
1837 peer = rt->peer;
1838 *release_it = false;
1839 }
1840
1841 return peer;
1842}
1843EXPORT_SYMBOL(tcp_v4_get_peer);
1844
1845void *tcp_v4_tw_get_peer(struct sock *sk)
1846{
1847 const struct inet_timewait_sock *tw = inet_twsk(sk);
1848
1849 return inet_getpeer_v4(tw->tw_daddr, 1);
1850}
1851EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1852
1853static struct timewait_sock_ops tcp_timewait_sock_ops = { 1869static struct timewait_sock_ops tcp_timewait_sock_ops = {
1854 .twsk_obj_size = sizeof(struct tcp_timewait_sock), 1870 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1855 .twsk_unique = tcp_twsk_unique, 1871 .twsk_unique = tcp_twsk_unique,
1856 .twsk_destructor= tcp_twsk_destructor, 1872 .twsk_destructor= tcp_twsk_destructor,
1857 .twsk_getpeer = tcp_v4_tw_get_peer,
1858}; 1873};
1859 1874
1860const struct inet_connection_sock_af_ops ipv4_specific = { 1875const struct inet_connection_sock_af_ops ipv4_specific = {
@@ -1863,7 +1878,6 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
1863 .rebuild_header = inet_sk_rebuild_header, 1878 .rebuild_header = inet_sk_rebuild_header,
1864 .conn_request = tcp_v4_conn_request, 1879 .conn_request = tcp_v4_conn_request,
1865 .syn_recv_sock = tcp_v4_syn_recv_sock, 1880 .syn_recv_sock = tcp_v4_syn_recv_sock,
1866 .get_peer = tcp_v4_get_peer,
1867 .net_header_len = sizeof(struct iphdr), 1881 .net_header_len = sizeof(struct iphdr),
1868 .setsockopt = ip_setsockopt, 1882 .setsockopt = ip_setsockopt,
1869 .getsockopt = ip_getsockopt, 1883 .getsockopt = ip_getsockopt,
@@ -1953,6 +1967,9 @@ void tcp_v4_destroy_sock(struct sock *sk)
1953 tp->cookie_values = NULL; 1967 tp->cookie_values = NULL;
1954 } 1968 }
1955 1969
1970 /* If socket is aborted during connect operation */
1971 tcp_free_fastopen_req(tp);
1972
1956 sk_sockets_allocated_dec(sk); 1973 sk_sockets_allocated_dec(sk);
1957 sock_release_memcg(sk); 1974 sock_release_memcg(sk);
1958} 1975}
@@ -2593,6 +2610,8 @@ struct proto tcp_prot = {
2593 .sendmsg = tcp_sendmsg, 2610 .sendmsg = tcp_sendmsg,
2594 .sendpage = tcp_sendpage, 2611 .sendpage = tcp_sendpage,
2595 .backlog_rcv = tcp_v4_do_rcv, 2612 .backlog_rcv = tcp_v4_do_rcv,
2613 .release_cb = tcp_release_cb,
2614 .mtu_reduced = tcp_v4_mtu_reduced,
2596 .hash = inet_hash, 2615 .hash = inet_hash,
2597 .unhash = inet_unhash, 2616 .unhash = inet_unhash,
2598 .get_port = inet_csk_get_port, 2617 .get_port = inet_csk_get_port,
@@ -2614,7 +2633,7 @@ struct proto tcp_prot = {
2614 .compat_setsockopt = compat_tcp_setsockopt, 2633 .compat_setsockopt = compat_tcp_setsockopt,
2615 .compat_getsockopt = compat_tcp_getsockopt, 2634 .compat_getsockopt = compat_tcp_getsockopt,
2616#endif 2635#endif
2617#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM 2636#ifdef CONFIG_MEMCG_KMEM
2618 .init_cgroup = tcp_init_cgroup, 2637 .init_cgroup = tcp_init_cgroup,
2619 .destroy_cgroup = tcp_destroy_cgroup, 2638 .destroy_cgroup = tcp_destroy_cgroup,
2620 .proto_cgroup = tcp_proto_cgroup, 2639 .proto_cgroup = tcp_proto_cgroup,
@@ -2624,13 +2643,11 @@ EXPORT_SYMBOL(tcp_prot);
2624 2643
2625static int __net_init tcp_sk_init(struct net *net) 2644static int __net_init tcp_sk_init(struct net *net)
2626{ 2645{
2627 return inet_ctl_sock_create(&net->ipv4.tcp_sock, 2646 return 0;
2628 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2629} 2647}
2630 2648
2631static void __net_exit tcp_sk_exit(struct net *net) 2649static void __net_exit tcp_sk_exit(struct net *net)
2632{ 2650{
2633 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2634} 2651}
2635 2652
2636static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) 2653static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)