aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c261
1 files changed, 174 insertions, 87 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 65b8ebfd078a..020766292bb0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -60,6 +60,7 @@
60#include <linux/jhash.h> 60#include <linux/jhash.h>
61#include <linux/init.h> 61#include <linux/init.h>
62#include <linux/times.h> 62#include <linux/times.h>
63#include <linux/slab.h>
63 64
64#include <net/net_namespace.h> 65#include <net/net_namespace.h>
65#include <net/icmp.h> 66#include <net/icmp.h>
@@ -83,6 +84,7 @@
83 84
84int sysctl_tcp_tw_reuse __read_mostly; 85int sysctl_tcp_tw_reuse __read_mostly;
85int sysctl_tcp_low_latency __read_mostly; 86int sysctl_tcp_low_latency __read_mostly;
87EXPORT_SYMBOL(sysctl_tcp_low_latency);
86 88
87 89
88#ifdef CONFIG_TCP_MD5SIG 90#ifdef CONFIG_TCP_MD5SIG
@@ -99,6 +101,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
99#endif 101#endif
100 102
101struct inet_hashinfo tcp_hashinfo; 103struct inet_hashinfo tcp_hashinfo;
104EXPORT_SYMBOL(tcp_hashinfo);
102 105
103static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) 106static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
104{ 107{
@@ -138,7 +141,6 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
138 141
139 return 0; 142 return 0;
140} 143}
141
142EXPORT_SYMBOL_GPL(tcp_twsk_unique); 144EXPORT_SYMBOL_GPL(tcp_twsk_unique);
143 145
144/* This will initiate an outgoing connection. */ 146/* This will initiate an outgoing connection. */
@@ -203,10 +205,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
203 * TIME-WAIT * and initialize rx_opt.ts_recent from it, 205 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
204 * when trying new connection. 206 * when trying new connection.
205 */ 207 */
206 if (peer != NULL && 208 if (peer) {
207 (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { 209 inet_peer_refcheck(peer);
208 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; 210 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
209 tp->rx_opt.ts_recent = peer->tcp_ts; 211 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
212 tp->rx_opt.ts_recent = peer->tcp_ts;
213 }
210 } 214 }
211 } 215 }
212 216
@@ -236,7 +240,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
236 240
237 /* OK, now commit destination to socket. */ 241 /* OK, now commit destination to socket. */
238 sk->sk_gso_type = SKB_GSO_TCPV4; 242 sk->sk_gso_type = SKB_GSO_TCPV4;
239 sk_setup_caps(sk, &rt->u.dst); 243 sk_setup_caps(sk, &rt->dst);
240 244
241 if (!tp->write_seq) 245 if (!tp->write_seq)
242 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, 246 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
@@ -264,6 +268,7 @@ failure:
264 inet->inet_dport = 0; 268 inet->inet_dport = 0;
265 return err; 269 return err;
266} 270}
271EXPORT_SYMBOL(tcp_v4_connect);
267 272
268/* 273/*
269 * This routine does path mtu discovery as defined in RFC1191. 274 * This routine does path mtu discovery as defined in RFC1191.
@@ -370,6 +375,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
370 if (sk->sk_state == TCP_CLOSE) 375 if (sk->sk_state == TCP_CLOSE)
371 goto out; 376 goto out;
372 377
378 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
379 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
380 goto out;
381 }
382
373 icsk = inet_csk(sk); 383 icsk = inet_csk(sk);
374 tp = tcp_sk(sk); 384 tp = tcp_sk(sk);
375 seq = ntohl(th->seq); 385 seq = ntohl(th->seq);
@@ -513,26 +523,32 @@ out:
513 sock_put(sk); 523 sock_put(sk);
514} 524}
515 525
516/* This routine computes an IPv4 TCP checksum. */ 526static void __tcp_v4_send_check(struct sk_buff *skb,
517void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) 527 __be32 saddr, __be32 daddr)
518{ 528{
519 struct inet_sock *inet = inet_sk(sk);
520 struct tcphdr *th = tcp_hdr(skb); 529 struct tcphdr *th = tcp_hdr(skb);
521 530
522 if (skb->ip_summed == CHECKSUM_PARTIAL) { 531 if (skb->ip_summed == CHECKSUM_PARTIAL) {
523 th->check = ~tcp_v4_check(len, inet->inet_saddr, 532 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
524 inet->inet_daddr, 0);
525 skb->csum_start = skb_transport_header(skb) - skb->head; 533 skb->csum_start = skb_transport_header(skb) - skb->head;
526 skb->csum_offset = offsetof(struct tcphdr, check); 534 skb->csum_offset = offsetof(struct tcphdr, check);
527 } else { 535 } else {
528 th->check = tcp_v4_check(len, inet->inet_saddr, 536 th->check = tcp_v4_check(skb->len, saddr, daddr,
529 inet->inet_daddr,
530 csum_partial(th, 537 csum_partial(th,
531 th->doff << 2, 538 th->doff << 2,
532 skb->csum)); 539 skb->csum));
533 } 540 }
534} 541}
535 542
543/* This routine computes an IPv4 TCP checksum. */
544void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
545{
546 struct inet_sock *inet = inet_sk(sk);
547
548 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
549}
550EXPORT_SYMBOL(tcp_v4_send_check);
551
536int tcp_v4_gso_send_check(struct sk_buff *skb) 552int tcp_v4_gso_send_check(struct sk_buff *skb)
537{ 553{
538 const struct iphdr *iph; 554 const struct iphdr *iph;
@@ -545,10 +561,8 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
545 th = tcp_hdr(skb); 561 th = tcp_hdr(skb);
546 562
547 th->check = 0; 563 th->check = 0;
548 th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
549 skb->csum_start = skb_transport_header(skb) - skb->head;
550 skb->csum_offset = offsetof(struct tcphdr, check);
551 skb->ip_summed = CHECKSUM_PARTIAL; 564 skb->ip_summed = CHECKSUM_PARTIAL;
565 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
552 return 0; 566 return 0;
553} 567}
554 568
@@ -742,9 +756,9 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
742 * This still operates on a request_sock only, not on a big 756 * This still operates on a request_sock only, not on a big
743 * socket. 757 * socket.
744 */ 758 */
745static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, 759static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
746 struct request_sock *req, 760 struct request_sock *req,
747 struct request_values *rvp) 761 struct request_values *rvp)
748{ 762{
749 const struct inet_request_sock *ireq = inet_rsk(req); 763 const struct inet_request_sock *ireq = inet_rsk(req);
750 int err = -1; 764 int err = -1;
@@ -757,13 +771,7 @@ static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
757 skb = tcp_make_synack(sk, dst, req, rvp); 771 skb = tcp_make_synack(sk, dst, req, rvp);
758 772
759 if (skb) { 773 if (skb) {
760 struct tcphdr *th = tcp_hdr(skb); 774 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
761
762 th->check = tcp_v4_check(skb->len,
763 ireq->loc_addr,
764 ireq->rmt_addr,
765 csum_partial(th, skb->len,
766 skb->csum));
767 775
768 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, 776 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
769 ireq->rmt_addr, 777 ireq->rmt_addr,
@@ -775,10 +783,11 @@ static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
775 return err; 783 return err;
776} 784}
777 785
778static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, 786static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
779 struct request_values *rvp) 787 struct request_values *rvp)
780{ 788{
781 return __tcp_v4_send_synack(sk, NULL, req, rvp); 789 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
790 return tcp_v4_send_synack(sk, NULL, req, rvp);
782} 791}
783 792
784/* 793/*
@@ -789,19 +798,20 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
789 kfree(inet_rsk(req)->opt); 798 kfree(inet_rsk(req)->opt);
790} 799}
791 800
792#ifdef CONFIG_SYN_COOKIES 801static void syn_flood_warning(const struct sk_buff *skb)
793static void syn_flood_warning(struct sk_buff *skb)
794{ 802{
795 static unsigned long warntime; 803 const char *msg;
796 804
797 if (time_after(jiffies, (warntime + HZ * 60))) { 805#ifdef CONFIG_SYN_COOKIES
798 warntime = jiffies; 806 if (sysctl_tcp_syncookies)
799 printk(KERN_INFO 807 msg = "Sending cookies";
800 "possible SYN flooding on port %d. Sending cookies.\n", 808 else
801 ntohs(tcp_hdr(skb)->dest));
802 }
803}
804#endif 809#endif
810 msg = "Dropping request";
811
812 pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
813 ntohs(tcp_hdr(skb)->dest), msg);
814}
805 815
806/* 816/*
807 * Save and compile IPv4 options into the request_sock if needed. 817 * Save and compile IPv4 options into the request_sock if needed.
@@ -853,7 +863,6 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
853{ 863{
854 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr); 864 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
855} 865}
856
857EXPORT_SYMBOL(tcp_v4_md5_lookup); 866EXPORT_SYMBOL(tcp_v4_md5_lookup);
858 867
859static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, 868static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
@@ -887,7 +896,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
887 kfree(newkey); 896 kfree(newkey);
888 return -ENOMEM; 897 return -ENOMEM;
889 } 898 }
890 sk->sk_route_caps &= ~NETIF_F_GSO_MASK; 899 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
891 } 900 }
892 if (tcp_alloc_md5sig_pool(sk) == NULL) { 901 if (tcp_alloc_md5sig_pool(sk) == NULL) {
893 kfree(newkey); 902 kfree(newkey);
@@ -920,7 +929,6 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
920 } 929 }
921 return 0; 930 return 0;
922} 931}
923
924EXPORT_SYMBOL(tcp_v4_md5_do_add); 932EXPORT_SYMBOL(tcp_v4_md5_do_add);
925 933
926static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, 934static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
@@ -958,7 +966,6 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
958 } 966 }
959 return -ENOENT; 967 return -ENOENT;
960} 968}
961
962EXPORT_SYMBOL(tcp_v4_md5_do_del); 969EXPORT_SYMBOL(tcp_v4_md5_do_del);
963 970
964static void tcp_v4_clear_md5_list(struct sock *sk) 971static void tcp_v4_clear_md5_list(struct sock *sk)
@@ -1017,7 +1024,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1017 return -EINVAL; 1024 return -EINVAL;
1018 1025
1019 tp->md5sig_info = p; 1026 tp->md5sig_info = p;
1020 sk->sk_route_caps &= ~NETIF_F_GSO_MASK; 1027 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1021 } 1028 }
1022 1029
1023 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation); 1030 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
@@ -1131,7 +1138,6 @@ clear_hash_noput:
1131 memset(md5_hash, 0, 16); 1138 memset(md5_hash, 0, 16);
1132 return 1; 1139 return 1;
1133} 1140}
1134
1135EXPORT_SYMBOL(tcp_v4_md5_hash_skb); 1141EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1136 1142
1137static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) 1143static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
@@ -1192,10 +1198,11 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1192struct request_sock_ops tcp_request_sock_ops __read_mostly = { 1198struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1193 .family = PF_INET, 1199 .family = PF_INET,
1194 .obj_size = sizeof(struct tcp_request_sock), 1200 .obj_size = sizeof(struct tcp_request_sock),
1195 .rtx_syn_ack = tcp_v4_send_synack, 1201 .rtx_syn_ack = tcp_v4_rtx_synack,
1196 .send_ack = tcp_v4_reqsk_send_ack, 1202 .send_ack = tcp_v4_reqsk_send_ack,
1197 .destructor = tcp_v4_reqsk_destructor, 1203 .destructor = tcp_v4_reqsk_destructor,
1198 .send_reset = tcp_v4_send_reset, 1204 .send_reset = tcp_v4_send_reset,
1205 .syn_ack_timeout = tcp_syn_ack_timeout,
1199}; 1206};
1200 1207
1201#ifdef CONFIG_TCP_MD5SIG 1208#ifdef CONFIG_TCP_MD5SIG
@@ -1238,6 +1245,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1238 * evidently real one. 1245 * evidently real one.
1239 */ 1246 */
1240 if (inet_csk_reqsk_queue_is_full(sk) && !isn) { 1247 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1248 if (net_ratelimit())
1249 syn_flood_warning(skb);
1241#ifdef CONFIG_SYN_COOKIES 1250#ifdef CONFIG_SYN_COOKIES
1242 if (sysctl_tcp_syncookies) { 1251 if (sysctl_tcp_syncookies) {
1243 want_cookie = 1; 1252 want_cookie = 1;
@@ -1281,8 +1290,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1281 goto drop_and_release; 1290 goto drop_and_release;
1282 1291
1283 /* Secret recipe starts with IP addresses */ 1292 /* Secret recipe starts with IP addresses */
1284 *mess++ ^= daddr; 1293 *mess++ ^= (__force u32)daddr;
1285 *mess++ ^= saddr; 1294 *mess++ ^= (__force u32)saddr;
1286 1295
1287 /* plus variable length Initiator Cookie */ 1296 /* plus variable length Initiator Cookie */
1288 c = (u8 *)mess; 1297 c = (u8 *)mess;
@@ -1318,15 +1327,12 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1318 if (security_inet_conn_request(sk, skb, req)) 1327 if (security_inet_conn_request(sk, skb, req))
1319 goto drop_and_free; 1328 goto drop_and_free;
1320 1329
1321 if (!want_cookie) 1330 if (!want_cookie || tmp_opt.tstamp_ok)
1322 TCP_ECN_create_request(req, tcp_hdr(skb)); 1331 TCP_ECN_create_request(req, tcp_hdr(skb));
1323 1332
1324 if (want_cookie) { 1333 if (want_cookie) {
1325#ifdef CONFIG_SYN_COOKIES
1326 syn_flood_warning(skb);
1327 req->cookie_ts = tmp_opt.tstamp_ok;
1328#endif
1329 isn = cookie_v4_init_sequence(sk, skb, &req->mss); 1334 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1335 req->cookie_ts = tmp_opt.tstamp_ok;
1330 } else if (!isn) { 1336 } else if (!isn) {
1331 struct inet_peer *peer = NULL; 1337 struct inet_peer *peer = NULL;
1332 1338
@@ -1344,6 +1350,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1344 (dst = inet_csk_route_req(sk, req)) != NULL && 1350 (dst = inet_csk_route_req(sk, req)) != NULL &&
1345 (peer = rt_get_peer((struct rtable *)dst)) != NULL && 1351 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1346 peer->v4daddr == saddr) { 1352 peer->v4daddr == saddr) {
1353 inet_peer_refcheck(peer);
1347 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && 1354 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1348 (s32)(peer->tcp_ts - req->ts_recent) > 1355 (s32)(peer->tcp_ts - req->ts_recent) >
1349 TCP_PAWS_WINDOW) { 1356 TCP_PAWS_WINDOW) {
@@ -1373,8 +1380,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1373 } 1380 }
1374 tcp_rsk(req)->snt_isn = isn; 1381 tcp_rsk(req)->snt_isn = isn;
1375 1382
1376 if (__tcp_v4_send_synack(sk, dst, req, 1383 if (tcp_v4_send_synack(sk, dst, req,
1377 (struct request_values *)&tmp_ext) || 1384 (struct request_values *)&tmp_ext) ||
1378 want_cookie) 1385 want_cookie)
1379 goto drop_and_free; 1386 goto drop_and_free;
1380 1387
@@ -1388,6 +1395,7 @@ drop_and_free:
1388drop: 1395drop:
1389 return 0; 1396 return 0;
1390} 1397}
1398EXPORT_SYMBOL(tcp_v4_conn_request);
1391 1399
1392 1400
1393/* 1401/*
@@ -1457,7 +1465,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1457 if (newkey != NULL) 1465 if (newkey != NULL)
1458 tcp_v4_md5_do_add(newsk, newinet->inet_daddr, 1466 tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
1459 newkey, key->keylen); 1467 newkey, key->keylen);
1460 newsk->sk_route_caps &= ~NETIF_F_GSO_MASK; 1468 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1461 } 1469 }
1462#endif 1470#endif
1463 1471
@@ -1473,6 +1481,7 @@ exit:
1473 dst_release(dst); 1481 dst_release(dst);
1474 return NULL; 1482 return NULL;
1475} 1483}
1484EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1476 1485
1477static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 1486static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1478{ 1487{
@@ -1499,7 +1508,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1499 } 1508 }
1500 1509
1501#ifdef CONFIG_SYN_COOKIES 1510#ifdef CONFIG_SYN_COOKIES
1502 if (!th->rst && !th->syn && th->ack) 1511 if (!th->syn)
1503 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); 1512 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1504#endif 1513#endif
1505 return sk; 1514 return sk;
@@ -1550,6 +1559,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1550#endif 1559#endif
1551 1560
1552 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1561 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1562 sock_rps_save_rxhash(sk, skb->rxhash);
1553 TCP_CHECK_TIMER(sk); 1563 TCP_CHECK_TIMER(sk);
1554 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { 1564 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1555 rsk = sk; 1565 rsk = sk;
@@ -1574,7 +1584,9 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1574 } 1584 }
1575 return 0; 1585 return 0;
1576 } 1586 }
1577 } 1587 } else
1588 sock_rps_save_rxhash(sk, skb->rxhash);
1589
1578 1590
1579 TCP_CHECK_TIMER(sk); 1591 TCP_CHECK_TIMER(sk);
1580 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { 1592 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
@@ -1599,6 +1611,7 @@ csum_err:
1599 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 1611 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1600 goto discard; 1612 goto discard;
1601} 1613}
1614EXPORT_SYMBOL(tcp_v4_do_rcv);
1602 1615
1603/* 1616/*
1604 * From tcp_input.c 1617 * From tcp_input.c
@@ -1653,6 +1666,11 @@ process:
1653 if (sk->sk_state == TCP_TIME_WAIT) 1666 if (sk->sk_state == TCP_TIME_WAIT)
1654 goto do_time_wait; 1667 goto do_time_wait;
1655 1668
1669 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1670 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1671 goto discard_and_relse;
1672 }
1673
1656 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 1674 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1657 goto discard_and_relse; 1675 goto discard_and_relse;
1658 nf_reset(skb); 1676 nf_reset(skb);
@@ -1677,8 +1695,11 @@ process:
1677 if (!tcp_prequeue(sk, skb)) 1695 if (!tcp_prequeue(sk, skb))
1678 ret = tcp_v4_do_rcv(sk, skb); 1696 ret = tcp_v4_do_rcv(sk, skb);
1679 } 1697 }
1680 } else 1698 } else if (unlikely(sk_add_backlog(sk, skb))) {
1681 sk_add_backlog(sk, skb); 1699 bh_unlock_sock(sk);
1700 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1701 goto discard_and_relse;
1702 }
1682 bh_unlock_sock(sk); 1703 bh_unlock_sock(sk);
1683 1704
1684 sock_put(sk); 1705 sock_put(sk);
@@ -1777,6 +1798,7 @@ int tcp_v4_remember_stamp(struct sock *sk)
1777 1798
1778 return 0; 1799 return 0;
1779} 1800}
1801EXPORT_SYMBOL(tcp_v4_remember_stamp);
1780 1802
1781int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) 1803int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1782{ 1804{
@@ -1816,6 +1838,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
1816 .compat_getsockopt = compat_ip_getsockopt, 1838 .compat_getsockopt = compat_ip_getsockopt,
1817#endif 1839#endif
1818}; 1840};
1841EXPORT_SYMBOL(ipv4_specific);
1819 1842
1820#ifdef CONFIG_TCP_MD5SIG 1843#ifdef CONFIG_TCP_MD5SIG
1821static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { 1844static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
@@ -1944,7 +1967,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
1944 1967
1945 percpu_counter_dec(&tcp_sockets_allocated); 1968 percpu_counter_dec(&tcp_sockets_allocated);
1946} 1969}
1947
1948EXPORT_SYMBOL(tcp_v4_destroy_sock); 1970EXPORT_SYMBOL(tcp_v4_destroy_sock);
1949 1971
1950#ifdef CONFIG_PROC_FS 1972#ifdef CONFIG_PROC_FS
@@ -1962,6 +1984,11 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1962 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; 1984 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1963} 1985}
1964 1986
1987/*
1988 * Get next listener socket follow cur. If cur is NULL, get first socket
1989 * starting from bucket given in st->bucket; when st->bucket is zero the
1990 * very first socket in the hash table is returned.
1991 */
1965static void *listening_get_next(struct seq_file *seq, void *cur) 1992static void *listening_get_next(struct seq_file *seq, void *cur)
1966{ 1993{
1967 struct inet_connection_sock *icsk; 1994 struct inet_connection_sock *icsk;
@@ -1972,14 +1999,15 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
1972 struct net *net = seq_file_net(seq); 1999 struct net *net = seq_file_net(seq);
1973 2000
1974 if (!sk) { 2001 if (!sk) {
1975 st->bucket = 0; 2002 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1976 ilb = &tcp_hashinfo.listening_hash[0];
1977 spin_lock_bh(&ilb->lock); 2003 spin_lock_bh(&ilb->lock);
1978 sk = sk_nulls_head(&ilb->head); 2004 sk = sk_nulls_head(&ilb->head);
2005 st->offset = 0;
1979 goto get_sk; 2006 goto get_sk;
1980 } 2007 }
1981 ilb = &tcp_hashinfo.listening_hash[st->bucket]; 2008 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1982 ++st->num; 2009 ++st->num;
2010 ++st->offset;
1983 2011
1984 if (st->state == TCP_SEQ_STATE_OPENREQ) { 2012 if (st->state == TCP_SEQ_STATE_OPENREQ) {
1985 struct request_sock *req = cur; 2013 struct request_sock *req = cur;
@@ -1994,6 +2022,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
1994 } 2022 }
1995 req = req->dl_next; 2023 req = req->dl_next;
1996 } 2024 }
2025 st->offset = 0;
1997 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) 2026 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1998 break; 2027 break;
1999get_req: 2028get_req:
@@ -2029,6 +2058,7 @@ start_req:
2029 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2058 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2030 } 2059 }
2031 spin_unlock_bh(&ilb->lock); 2060 spin_unlock_bh(&ilb->lock);
2061 st->offset = 0;
2032 if (++st->bucket < INET_LHTABLE_SIZE) { 2062 if (++st->bucket < INET_LHTABLE_SIZE) {
2033 ilb = &tcp_hashinfo.listening_hash[st->bucket]; 2063 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2034 spin_lock_bh(&ilb->lock); 2064 spin_lock_bh(&ilb->lock);
@@ -2042,7 +2072,12 @@ out:
2042 2072
2043static void *listening_get_idx(struct seq_file *seq, loff_t *pos) 2073static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2044{ 2074{
2045 void *rc = listening_get_next(seq, NULL); 2075 struct tcp_iter_state *st = seq->private;
2076 void *rc;
2077
2078 st->bucket = 0;
2079 st->offset = 0;
2080 rc = listening_get_next(seq, NULL);
2046 2081
2047 while (rc && *pos) { 2082 while (rc && *pos) {
2048 rc = listening_get_next(seq, rc); 2083 rc = listening_get_next(seq, rc);
@@ -2057,13 +2092,18 @@ static inline int empty_bucket(struct tcp_iter_state *st)
2057 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); 2092 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2058} 2093}
2059 2094
2095/*
2096 * Get first established socket starting from bucket given in st->bucket.
2097 * If st->bucket is zero, the very first socket in the hash is returned.
2098 */
2060static void *established_get_first(struct seq_file *seq) 2099static void *established_get_first(struct seq_file *seq)
2061{ 2100{
2062 struct tcp_iter_state *st = seq->private; 2101 struct tcp_iter_state *st = seq->private;
2063 struct net *net = seq_file_net(seq); 2102 struct net *net = seq_file_net(seq);
2064 void *rc = NULL; 2103 void *rc = NULL;
2065 2104
2066 for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { 2105 st->offset = 0;
2106 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2067 struct sock *sk; 2107 struct sock *sk;
2068 struct hlist_nulls_node *node; 2108 struct hlist_nulls_node *node;
2069 struct inet_timewait_sock *tw; 2109 struct inet_timewait_sock *tw;
@@ -2108,6 +2148,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
2108 struct net *net = seq_file_net(seq); 2148 struct net *net = seq_file_net(seq);
2109 2149
2110 ++st->num; 2150 ++st->num;
2151 ++st->offset;
2111 2152
2112 if (st->state == TCP_SEQ_STATE_TIME_WAIT) { 2153 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2113 tw = cur; 2154 tw = cur;
@@ -2124,6 +2165,7 @@ get_tw:
2124 st->state = TCP_SEQ_STATE_ESTABLISHED; 2165 st->state = TCP_SEQ_STATE_ESTABLISHED;
2125 2166
2126 /* Look for next non empty bucket */ 2167 /* Look for next non empty bucket */
2168 st->offset = 0;
2127 while (++st->bucket <= tcp_hashinfo.ehash_mask && 2169 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2128 empty_bucket(st)) 2170 empty_bucket(st))
2129 ; 2171 ;
@@ -2151,7 +2193,11 @@ out:
2151 2193
2152static void *established_get_idx(struct seq_file *seq, loff_t pos) 2194static void *established_get_idx(struct seq_file *seq, loff_t pos)
2153{ 2195{
2154 void *rc = established_get_first(seq); 2196 struct tcp_iter_state *st = seq->private;
2197 void *rc;
2198
2199 st->bucket = 0;
2200 rc = established_get_first(seq);
2155 2201
2156 while (rc && pos) { 2202 while (rc && pos) {
2157 rc = established_get_next(seq, rc); 2203 rc = established_get_next(seq, rc);
@@ -2176,24 +2222,72 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2176 return rc; 2222 return rc;
2177} 2223}
2178 2224
2225static void *tcp_seek_last_pos(struct seq_file *seq)
2226{
2227 struct tcp_iter_state *st = seq->private;
2228 int offset = st->offset;
2229 int orig_num = st->num;
2230 void *rc = NULL;
2231
2232 switch (st->state) {
2233 case TCP_SEQ_STATE_OPENREQ:
2234 case TCP_SEQ_STATE_LISTENING:
2235 if (st->bucket >= INET_LHTABLE_SIZE)
2236 break;
2237 st->state = TCP_SEQ_STATE_LISTENING;
2238 rc = listening_get_next(seq, NULL);
2239 while (offset-- && rc)
2240 rc = listening_get_next(seq, rc);
2241 if (rc)
2242 break;
2243 st->bucket = 0;
2244 /* Fallthrough */
2245 case TCP_SEQ_STATE_ESTABLISHED:
2246 case TCP_SEQ_STATE_TIME_WAIT:
2247 st->state = TCP_SEQ_STATE_ESTABLISHED;
2248 if (st->bucket > tcp_hashinfo.ehash_mask)
2249 break;
2250 rc = established_get_first(seq);
2251 while (offset-- && rc)
2252 rc = established_get_next(seq, rc);
2253 }
2254
2255 st->num = orig_num;
2256
2257 return rc;
2258}
2259
2179static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) 2260static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2180{ 2261{
2181 struct tcp_iter_state *st = seq->private; 2262 struct tcp_iter_state *st = seq->private;
2263 void *rc;
2264
2265 if (*pos && *pos == st->last_pos) {
2266 rc = tcp_seek_last_pos(seq);
2267 if (rc)
2268 goto out;
2269 }
2270
2182 st->state = TCP_SEQ_STATE_LISTENING; 2271 st->state = TCP_SEQ_STATE_LISTENING;
2183 st->num = 0; 2272 st->num = 0;
2184 return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2273 st->bucket = 0;
2274 st->offset = 0;
2275 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2276
2277out:
2278 st->last_pos = *pos;
2279 return rc;
2185} 2280}
2186 2281
2187static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2282static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2188{ 2283{
2284 struct tcp_iter_state *st = seq->private;
2189 void *rc = NULL; 2285 void *rc = NULL;
2190 struct tcp_iter_state *st;
2191 2286
2192 if (v == SEQ_START_TOKEN) { 2287 if (v == SEQ_START_TOKEN) {
2193 rc = tcp_get_idx(seq, 0); 2288 rc = tcp_get_idx(seq, 0);
2194 goto out; 2289 goto out;
2195 } 2290 }
2196 st = seq->private;
2197 2291
2198 switch (st->state) { 2292 switch (st->state) {
2199 case TCP_SEQ_STATE_OPENREQ: 2293 case TCP_SEQ_STATE_OPENREQ:
@@ -2201,6 +2295,8 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2201 rc = listening_get_next(seq, v); 2295 rc = listening_get_next(seq, v);
2202 if (!rc) { 2296 if (!rc) {
2203 st->state = TCP_SEQ_STATE_ESTABLISHED; 2297 st->state = TCP_SEQ_STATE_ESTABLISHED;
2298 st->bucket = 0;
2299 st->offset = 0;
2204 rc = established_get_first(seq); 2300 rc = established_get_first(seq);
2205 } 2301 }
2206 break; 2302 break;
@@ -2211,6 +2307,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2211 } 2307 }
2212out: 2308out:
2213 ++*pos; 2309 ++*pos;
2310 st->last_pos = *pos;
2214 return rc; 2311 return rc;
2215} 2312}
2216 2313
@@ -2249,6 +2346,7 @@ static int tcp_seq_open(struct inode *inode, struct file *file)
2249 2346
2250 s = ((struct seq_file *)file->private_data)->private; 2347 s = ((struct seq_file *)file->private_data)->private;
2251 s->family = afinfo->family; 2348 s->family = afinfo->family;
2349 s->last_pos = 0;
2252 return 0; 2350 return 0;
2253} 2351}
2254 2352
@@ -2272,11 +2370,13 @@ int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2272 rc = -ENOMEM; 2370 rc = -ENOMEM;
2273 return rc; 2371 return rc;
2274} 2372}
2373EXPORT_SYMBOL(tcp_proc_register);
2275 2374
2276void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) 2375void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2277{ 2376{
2278 proc_net_remove(net, afinfo->name); 2377 proc_net_remove(net, afinfo->name);
2279} 2378}
2379EXPORT_SYMBOL(tcp_proc_unregister);
2280 2380
2281static void get_openreq4(struct sock *sk, struct request_sock *req, 2381static void get_openreq4(struct sock *sk, struct request_sock *req,
2282 struct seq_file *f, int i, int uid, int *len) 2382 struct seq_file *f, int i, int uid, int *len)
@@ -2425,12 +2525,12 @@ static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2425 }, 2525 },
2426}; 2526};
2427 2527
2428static int tcp4_proc_init_net(struct net *net) 2528static int __net_init tcp4_proc_init_net(struct net *net)
2429{ 2529{
2430 return tcp_proc_register(net, &tcp4_seq_afinfo); 2530 return tcp_proc_register(net, &tcp4_seq_afinfo);
2431} 2531}
2432 2532
2433static void tcp4_proc_exit_net(struct net *net) 2533static void __net_exit tcp4_proc_exit_net(struct net *net)
2434{ 2534{
2435 tcp_proc_unregister(net, &tcp4_seq_afinfo); 2535 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2436} 2536}
@@ -2500,6 +2600,8 @@ struct proto tcp_prot = {
2500 .setsockopt = tcp_setsockopt, 2600 .setsockopt = tcp_setsockopt,
2501 .getsockopt = tcp_getsockopt, 2601 .getsockopt = tcp_getsockopt,
2502 .recvmsg = tcp_recvmsg, 2602 .recvmsg = tcp_recvmsg,
2603 .sendmsg = tcp_sendmsg,
2604 .sendpage = tcp_sendpage,
2503 .backlog_rcv = tcp_v4_do_rcv, 2605 .backlog_rcv = tcp_v4_do_rcv,
2504 .hash = inet_hash, 2606 .hash = inet_hash,
2505 .unhash = inet_unhash, 2607 .unhash = inet_unhash,
@@ -2518,11 +2620,13 @@ struct proto tcp_prot = {
2518 .twsk_prot = &tcp_timewait_sock_ops, 2620 .twsk_prot = &tcp_timewait_sock_ops,
2519 .rsk_prot = &tcp_request_sock_ops, 2621 .rsk_prot = &tcp_request_sock_ops,
2520 .h.hashinfo = &tcp_hashinfo, 2622 .h.hashinfo = &tcp_hashinfo,
2623 .no_autobind = true,
2521#ifdef CONFIG_COMPAT 2624#ifdef CONFIG_COMPAT
2522 .compat_setsockopt = compat_tcp_setsockopt, 2625 .compat_setsockopt = compat_tcp_setsockopt,
2523 .compat_getsockopt = compat_tcp_getsockopt, 2626 .compat_getsockopt = compat_tcp_getsockopt,
2524#endif 2627#endif
2525}; 2628};
2629EXPORT_SYMBOL(tcp_prot);
2526 2630
2527 2631
2528static int __net_init tcp_sk_init(struct net *net) 2632static int __net_init tcp_sk_init(struct net *net)
@@ -2553,20 +2657,3 @@ void __init tcp_v4_init(void)
2553 if (register_pernet_subsys(&tcp_sk_ops)) 2657 if (register_pernet_subsys(&tcp_sk_ops))
2554 panic("Failed to create the TCP control socket.\n"); 2658 panic("Failed to create the TCP control socket.\n");
2555} 2659}
2556
2557EXPORT_SYMBOL(ipv4_specific);
2558EXPORT_SYMBOL(tcp_hashinfo);
2559EXPORT_SYMBOL(tcp_prot);
2560EXPORT_SYMBOL(tcp_v4_conn_request);
2561EXPORT_SYMBOL(tcp_v4_connect);
2562EXPORT_SYMBOL(tcp_v4_do_rcv);
2563EXPORT_SYMBOL(tcp_v4_remember_stamp);
2564EXPORT_SYMBOL(tcp_v4_send_check);
2565EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2566
2567#ifdef CONFIG_PROC_FS
2568EXPORT_SYMBOL(tcp_proc_register);
2569EXPORT_SYMBOL(tcp_proc_unregister);
2570#endif
2571EXPORT_SYMBOL(sysctl_tcp_low_latency);
2572