aboutsummaryrefslogtreecommitdiffstats
path: root/include/net/tcp.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/tcp.h')
-rw-r--r--include/net/tcp.h116
1 files changed, 75 insertions, 41 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f75a04d752cb..e79aa48d9fc1 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -123,7 +123,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
123#endif 123#endif
124#define TCP_RTO_MAX ((unsigned)(120*HZ)) 124#define TCP_RTO_MAX ((unsigned)(120*HZ))
125#define TCP_RTO_MIN ((unsigned)(HZ/5)) 125#define TCP_RTO_MIN ((unsigned)(HZ/5))
126#define TCP_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC2988bis initial RTO value */ 126#define TCP_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */
127#define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value, now 127#define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value, now
128 * used as a fallback RTO for the 128 * used as a fallback RTO for the
129 * initial data transmission if no 129 * initial data transmission if no
@@ -252,6 +252,7 @@ extern int sysctl_tcp_max_ssthresh;
252extern int sysctl_tcp_cookie_size; 252extern int sysctl_tcp_cookie_size;
253extern int sysctl_tcp_thin_linear_timeouts; 253extern int sysctl_tcp_thin_linear_timeouts;
254extern int sysctl_tcp_thin_dupack; 254extern int sysctl_tcp_thin_dupack;
255extern int sysctl_tcp_early_retrans;
255 256
256extern atomic_long_t tcp_memory_allocated; 257extern atomic_long_t tcp_memory_allocated;
257extern struct percpu_counter tcp_sockets_allocated; 258extern struct percpu_counter tcp_sockets_allocated;
@@ -262,14 +263,14 @@ extern int tcp_memory_pressure;
262 * and worry about wraparound (automatic with unsigned arithmetic). 263 * and worry about wraparound (automatic with unsigned arithmetic).
263 */ 264 */
264 265
265static inline int before(__u32 seq1, __u32 seq2) 266static inline bool before(__u32 seq1, __u32 seq2)
266{ 267{
267 return (__s32)(seq1-seq2) < 0; 268 return (__s32)(seq1-seq2) < 0;
268} 269}
269#define after(seq2, seq1) before(seq1, seq2) 270#define after(seq2, seq1) before(seq1, seq2)
270 271
271/* is s2<=s1<=s3 ? */ 272/* is s2<=s1<=s3 ? */
272static inline int between(__u32 seq1, __u32 seq2, __u32 seq3) 273static inline bool between(__u32 seq1, __u32 seq2, __u32 seq3)
273{ 274{
274 return seq3 - seq2 >= seq1 - seq2; 275 return seq3 - seq2 >= seq1 - seq2;
275} 276}
@@ -304,7 +305,7 @@ static inline void tcp_synq_overflow(struct sock *sk)
304} 305}
305 306
306/* syncookies: no recent synqueue overflow on this listening socket? */ 307/* syncookies: no recent synqueue overflow on this listening socket? */
307static inline int tcp_synq_no_recent_overflow(const struct sock *sk) 308static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
308{ 309{
309 unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; 310 unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
310 return time_after(jiffies, last_overflow + TCP_TIMEOUT_FALLBACK); 311 return time_after(jiffies, last_overflow + TCP_TIMEOUT_FALLBACK);
@@ -366,13 +367,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk,
366#define TCP_ECN_DEMAND_CWR 4 367#define TCP_ECN_DEMAND_CWR 4
367#define TCP_ECN_SEEN 8 368#define TCP_ECN_SEEN 8
368 369
369static __inline__ void
370TCP_ECN_create_request(struct request_sock *req, struct tcphdr *th)
371{
372 if (sysctl_tcp_ecn && th->ece && th->cwr)
373 inet_rsk(req)->ecn_ok = 1;
374}
375
376enum tcp_tw_status { 370enum tcp_tw_status {
377 TCP_TW_SUCCESS = 0, 371 TCP_TW_SUCCESS = 0,
378 TCP_TW_RST = 1, 372 TCP_TW_RST = 1,
@@ -389,12 +383,13 @@ extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
389 struct request_sock **prev); 383 struct request_sock **prev);
390extern int tcp_child_process(struct sock *parent, struct sock *child, 384extern int tcp_child_process(struct sock *parent, struct sock *child,
391 struct sk_buff *skb); 385 struct sk_buff *skb);
392extern int tcp_use_frto(struct sock *sk); 386extern bool tcp_use_frto(struct sock *sk);
393extern void tcp_enter_frto(struct sock *sk); 387extern void tcp_enter_frto(struct sock *sk);
394extern void tcp_enter_loss(struct sock *sk, int how); 388extern void tcp_enter_loss(struct sock *sk, int how);
395extern void tcp_clear_retrans(struct tcp_sock *tp); 389extern void tcp_clear_retrans(struct tcp_sock *tp);
396extern void tcp_update_metrics(struct sock *sk); 390extern void tcp_update_metrics(struct sock *sk);
397extern void tcp_close(struct sock *sk, long timeout); 391extern void tcp_close(struct sock *sk, long timeout);
392extern void tcp_init_sock(struct sock *sk);
398extern unsigned int tcp_poll(struct file * file, struct socket *sock, 393extern unsigned int tcp_poll(struct file * file, struct socket *sock,
399 struct poll_table_struct *wait); 394 struct poll_table_struct *wait);
400extern int tcp_getsockopt(struct sock *sk, int level, int optname, 395extern int tcp_getsockopt(struct sock *sk, int level, int optname,
@@ -435,6 +430,9 @@ extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
435 struct request_values *rvp); 430 struct request_values *rvp);
436extern int tcp_disconnect(struct sock *sk, int flags); 431extern int tcp_disconnect(struct sock *sk, int flags);
437 432
433void tcp_connect_init(struct sock *sk);
434void tcp_finish_connect(struct sock *sk, struct sk_buff *skb);
435int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size);
438 436
439/* From syncookies.c */ 437/* From syncookies.c */
440extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; 438extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
@@ -472,7 +470,7 @@ static inline __u32 cookie_v6_init_sequence(struct sock *sk,
472 470
473extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, 471extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
474 int nonagle); 472 int nonagle);
475extern int tcp_may_send_now(struct sock *sk); 473extern bool tcp_may_send_now(struct sock *sk);
476extern int tcp_retransmit_skb(struct sock *, struct sk_buff *); 474extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
477extern void tcp_retransmit_timer(struct sock *sk); 475extern void tcp_retransmit_timer(struct sock *sk);
478extern void tcp_xmit_retransmit_queue(struct sock *); 476extern void tcp_xmit_retransmit_queue(struct sock *);
@@ -486,15 +484,17 @@ extern int tcp_write_wakeup(struct sock *);
486extern void tcp_send_fin(struct sock *sk); 484extern void tcp_send_fin(struct sock *sk);
487extern void tcp_send_active_reset(struct sock *sk, gfp_t priority); 485extern void tcp_send_active_reset(struct sock *sk, gfp_t priority);
488extern int tcp_send_synack(struct sock *); 486extern int tcp_send_synack(struct sock *);
489extern int tcp_syn_flood_action(struct sock *sk, 487extern bool tcp_syn_flood_action(struct sock *sk,
490 const struct sk_buff *skb, 488 const struct sk_buff *skb,
491 const char *proto); 489 const char *proto);
492extern void tcp_push_one(struct sock *, unsigned int mss_now); 490extern void tcp_push_one(struct sock *, unsigned int mss_now);
493extern void tcp_send_ack(struct sock *sk); 491extern void tcp_send_ack(struct sock *sk);
494extern void tcp_send_delayed_ack(struct sock *sk); 492extern void tcp_send_delayed_ack(struct sock *sk);
495 493
496/* tcp_input.c */ 494/* tcp_input.c */
497extern void tcp_cwnd_application_limited(struct sock *sk); 495extern void tcp_cwnd_application_limited(struct sock *sk);
496extern void tcp_resume_early_retransmit(struct sock *sk);
497extern void tcp_rearm_rto(struct sock *sk);
498 498
499/* tcp_timer.c */ 499/* tcp_timer.c */
500extern void tcp_init_xmit_timers(struct sock *); 500extern void tcp_init_xmit_timers(struct sock *);
@@ -540,8 +540,8 @@ extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
540 540
541extern void tcp_initialize_rcv_mss(struct sock *sk); 541extern void tcp_initialize_rcv_mss(struct sock *sk);
542 542
543extern int tcp_mtu_to_mss(const struct sock *sk, int pmtu); 543extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);
544extern int tcp_mss_to_mtu(const struct sock *sk, int mss); 544extern int tcp_mss_to_mtu(struct sock *sk, int mss);
545extern void tcp_mtup_init(struct sock *sk); 545extern void tcp_mtup_init(struct sock *sk);
546extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt); 546extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt);
547 547
@@ -609,6 +609,8 @@ static inline u32 tcp_receive_window(const struct tcp_sock *tp)
609 */ 609 */
610extern u32 __tcp_select_window(struct sock *sk); 610extern u32 __tcp_select_window(struct sock *sk);
611 611
612void tcp_send_window_probe(struct sock *sk);
613
612/* TCP timestamps are only 32-bits, this causes a slight 614/* TCP timestamps are only 32-bits, this causes a slight
613 * complication on 64-bit systems since we store a snapshot 615 * complication on 64-bit systems since we store a snapshot
614 * of jiffies in the buffer control blocks below. We decided 616 * of jiffies in the buffer control blocks below. We decided
@@ -645,21 +647,38 @@ struct tcp_skb_cb {
645 __u32 end_seq; /* SEQ + FIN + SYN + datalen */ 647 __u32 end_seq; /* SEQ + FIN + SYN + datalen */
646 __u32 when; /* used to compute rtt's */ 648 __u32 when; /* used to compute rtt's */
647 __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ 649 __u8 tcp_flags; /* TCP header flags. (tcp[13]) */
650
648 __u8 sacked; /* State flags for SACK/FACK. */ 651 __u8 sacked; /* State flags for SACK/FACK. */
649#define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */ 652#define TCPCB_SACKED_ACKED 0x01 /* SKB ACK'd by a SACK block */
650#define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */ 653#define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */
651#define TCPCB_LOST 0x04 /* SKB is lost */ 654#define TCPCB_LOST 0x04 /* SKB is lost */
652#define TCPCB_TAGBITS 0x07 /* All tag bits */ 655#define TCPCB_TAGBITS 0x07 /* All tag bits */
653 __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */
654 /* 1 byte hole */
655#define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */ 656#define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */
656#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS) 657#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
657 658
659 __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */
660 /* 1 byte hole */
658 __u32 ack_seq; /* Sequence number ACK'd */ 661 __u32 ack_seq; /* Sequence number ACK'd */
659}; 662};
660 663
661#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) 664#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
662 665
666/* RFC3168 : 6.1.1 SYN packets must not have ECT/ECN bits set
667 *
668 * If we receive a SYN packet with these bits set, it means a network is
669 * playing bad games with TOS bits. In order to avoid possible false congestion
670 * notifications, we disable TCP ECN negociation.
671 */
672static inline void
673TCP_ECN_create_request(struct request_sock *req, const struct sk_buff *skb)
674{
675 const struct tcphdr *th = tcp_hdr(skb);
676
677 if (sysctl_tcp_ecn && th->ece && th->cwr &&
678 INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield))
679 inet_rsk(req)->ecn_ok = 1;
680}
681
663/* Due to TSO, an SKB can be composed of multiple actual 682/* Due to TSO, an SKB can be composed of multiple actual
664 * packets. To keep these tracked properly, we use this. 683 * packets. To keep these tracked properly, we use this.
665 */ 684 */
@@ -775,12 +794,12 @@ static inline int tcp_is_sack(const struct tcp_sock *tp)
775 return tp->rx_opt.sack_ok; 794 return tp->rx_opt.sack_ok;
776} 795}
777 796
778static inline int tcp_is_reno(const struct tcp_sock *tp) 797static inline bool tcp_is_reno(const struct tcp_sock *tp)
779{ 798{
780 return !tcp_is_sack(tp); 799 return !tcp_is_sack(tp);
781} 800}
782 801
783static inline int tcp_is_fack(const struct tcp_sock *tp) 802static inline bool tcp_is_fack(const struct tcp_sock *tp)
784{ 803{
785 return tp->rx_opt.sack_ok & TCP_FACK_ENABLED; 804 return tp->rx_opt.sack_ok & TCP_FACK_ENABLED;
786} 805}
@@ -790,6 +809,21 @@ static inline void tcp_enable_fack(struct tcp_sock *tp)
790 tp->rx_opt.sack_ok |= TCP_FACK_ENABLED; 809 tp->rx_opt.sack_ok |= TCP_FACK_ENABLED;
791} 810}
792 811
812/* TCP early-retransmit (ER) is similar to but more conservative than
813 * the thin-dupack feature. Enable ER only if thin-dupack is disabled.
814 */
815static inline void tcp_enable_early_retrans(struct tcp_sock *tp)
816{
817 tp->do_early_retrans = sysctl_tcp_early_retrans &&
818 !sysctl_tcp_thin_dupack && sysctl_tcp_reordering == 3;
819 tp->early_retrans_delayed = 0;
820}
821
822static inline void tcp_disable_early_retrans(struct tcp_sock *tp)
823{
824 tp->do_early_retrans = 0;
825}
826
793static inline unsigned int tcp_left_out(const struct tcp_sock *tp) 827static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
794{ 828{
795 return tp->sacked_out + tp->lost_out; 829 return tp->sacked_out + tp->lost_out;
@@ -867,7 +901,7 @@ static inline u32 tcp_wnd_end(const struct tcp_sock *tp)
867{ 901{
868 return tp->snd_una + tp->snd_wnd; 902 return tp->snd_una + tp->snd_wnd;
869} 903}
870extern int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight); 904extern bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight);
871 905
872static inline void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss, 906static inline void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss,
873 const struct sk_buff *skb) 907 const struct sk_buff *skb)
@@ -910,7 +944,7 @@ static inline __sum16 __tcp_checksum_complete(struct sk_buff *skb)
910 return __skb_checksum_complete(skb); 944 return __skb_checksum_complete(skb);
911} 945}
912 946
913static inline int tcp_checksum_complete(struct sk_buff *skb) 947static inline bool tcp_checksum_complete(struct sk_buff *skb)
914{ 948{
915 return !skb_csum_unnecessary(skb) && 949 return !skb_csum_unnecessary(skb) &&
916 __tcp_checksum_complete(skb); 950 __tcp_checksum_complete(skb);
@@ -940,12 +974,12 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp)
940 * 974 *
941 * NOTE: is this not too big to inline? 975 * NOTE: is this not too big to inline?
942 */ 976 */
943static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) 977static inline bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
944{ 978{
945 struct tcp_sock *tp = tcp_sk(sk); 979 struct tcp_sock *tp = tcp_sk(sk);
946 980
947 if (sysctl_tcp_low_latency || !tp->ucopy.task) 981 if (sysctl_tcp_low_latency || !tp->ucopy.task)
948 return 0; 982 return false;
949 983
950 __skb_queue_tail(&tp->ucopy.prequeue, skb); 984 __skb_queue_tail(&tp->ucopy.prequeue, skb);
951 tp->ucopy.memory += skb->truesize; 985 tp->ucopy.memory += skb->truesize;
@@ -969,7 +1003,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
969 (3 * tcp_rto_min(sk)) / 4, 1003 (3 * tcp_rto_min(sk)) / 4,
970 TCP_RTO_MAX); 1004 TCP_RTO_MAX);
971 } 1005 }
972 return 1; 1006 return true;
973} 1007}
974 1008
975 1009
@@ -1074,28 +1108,28 @@ static inline int tcp_fin_time(const struct sock *sk)
1074 return fin_timeout; 1108 return fin_timeout;
1075} 1109}
1076 1110
1077static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, 1111static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt,
1078 int paws_win) 1112 int paws_win)
1079{ 1113{
1080 if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win) 1114 if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
1081 return 1; 1115 return true;
1082 if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)) 1116 if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))
1083 return 1; 1117 return true;
1084 /* 1118 /*
1085 * Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0, 1119 * Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0,
1086 * then following tcp messages have valid values. Ignore 0 value, 1120 * then following tcp messages have valid values. Ignore 0 value,
1087 * or else 'negative' tsval might forbid us to accept their packets. 1121 * or else 'negative' tsval might forbid us to accept their packets.
1088 */ 1122 */
1089 if (!rx_opt->ts_recent) 1123 if (!rx_opt->ts_recent)
1090 return 1; 1124 return true;
1091 return 0; 1125 return false;
1092} 1126}
1093 1127
1094static inline int tcp_paws_reject(const struct tcp_options_received *rx_opt, 1128static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
1095 int rst) 1129 int rst)
1096{ 1130{
1097 if (tcp_paws_check(rx_opt, 0)) 1131 if (tcp_paws_check(rx_opt, 0))
1098 return 0; 1132 return false;
1099 1133
1100 /* RST segments are not recommended to carry timestamp, 1134 /* RST segments are not recommended to carry timestamp,
1101 and, if they do, it is recommended to ignore PAWS because 1135 and, if they do, it is recommended to ignore PAWS because
@@ -1110,8 +1144,8 @@ static inline int tcp_paws_reject(const struct tcp_options_received *rx_opt,
1110 However, we can relax time bounds for RST segments to MSL. 1144 However, we can relax time bounds for RST segments to MSL.
1111 */ 1145 */
1112 if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL) 1146 if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
1113 return 0; 1147 return false;
1114 return 1; 1148 return true;
1115} 1149}
1116 1150
1117static inline void tcp_mib_init(struct net *net) 1151static inline void tcp_mib_init(struct net *net)
@@ -1226,7 +1260,7 @@ extern void tcp_put_md5sig_pool(void);
1226 1260
1227extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, const struct tcphdr *); 1261extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, const struct tcphdr *);
1228extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *, 1262extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *,
1229 unsigned header_len); 1263 unsigned int header_len);
1230extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, 1264extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
1231 const struct tcp_md5sig_key *key); 1265 const struct tcp_md5sig_key *key);
1232 1266
@@ -1349,7 +1383,7 @@ static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk)
1349 __skb_unlink(skb, &sk->sk_write_queue); 1383 __skb_unlink(skb, &sk->sk_write_queue);
1350} 1384}
1351 1385
1352static inline int tcp_write_queue_empty(struct sock *sk) 1386static inline bool tcp_write_queue_empty(struct sock *sk)
1353{ 1387{
1354 return skb_queue_empty(&sk->sk_write_queue); 1388 return skb_queue_empty(&sk->sk_write_queue);
1355} 1389}
@@ -1406,7 +1440,7 @@ static inline void tcp_highest_sack_combine(struct sock *sk,
1406/* Determines whether this is a thin stream (which may suffer from 1440/* Determines whether this is a thin stream (which may suffer from
1407 * increased latency). Used to trigger latency-reducing mechanisms. 1441 * increased latency). Used to trigger latency-reducing mechanisms.
1408 */ 1442 */
1409static inline unsigned int tcp_stream_is_thin(struct tcp_sock *tp) 1443static inline bool tcp_stream_is_thin(struct tcp_sock *tp)
1410{ 1444{
1411 return tp->packets_out < 4 && !tcp_in_initial_slowstart(tp); 1445 return tp->packets_out < 4 && !tcp_in_initial_slowstart(tp);
1412} 1446}