aboutsummaryrefslogtreecommitdiffstats
path: root/include/net/tcp.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/tcp.h')
-rw-r--r--include/net/tcp.h190
1 files changed, 128 insertions, 62 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 03a49c703377..aa04b9a5093b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -30,6 +30,7 @@
30#include <linux/dmaengine.h> 30#include <linux/dmaengine.h>
31#include <linux/crypto.h> 31#include <linux/crypto.h>
32#include <linux/cryptohash.h> 32#include <linux/cryptohash.h>
33#include <linux/kref.h>
33 34
34#include <net/inet_connection_sock.h> 35#include <net/inet_connection_sock.h>
35#include <net/inet_timewait_sock.h> 36#include <net/inet_timewait_sock.h>
@@ -62,9 +63,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
62/* Minimal accepted MSS. It is (60+60+8) - (20+20). */ 63/* Minimal accepted MSS. It is (60+60+8) - (20+20). */
63#define TCP_MIN_MSS 88U 64#define TCP_MIN_MSS 88U
64 65
65/* Minimal RCV_MSS. */
66#define TCP_MIN_RCVMSS 536U
67
68/* The least MTU to use for probing */ 66/* The least MTU to use for probing */
69#define TCP_BASE_MSS 512 67#define TCP_BASE_MSS 512
70 68
@@ -167,6 +165,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
167#define TCPOPT_SACK 5 /* SACK Block */ 165#define TCPOPT_SACK 5 /* SACK Block */
168#define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ 166#define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
169#define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ 167#define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
168#define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */
170 169
171/* 170/*
172 * TCP option lengths 171 * TCP option lengths
@@ -177,6 +176,10 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
177#define TCPOLEN_SACK_PERM 2 176#define TCPOLEN_SACK_PERM 2
178#define TCPOLEN_TIMESTAMP 10 177#define TCPOLEN_TIMESTAMP 10
179#define TCPOLEN_MD5SIG 18 178#define TCPOLEN_MD5SIG 18
179#define TCPOLEN_COOKIE_BASE 2 /* Cookie-less header extension */
180#define TCPOLEN_COOKIE_PAIR 3 /* Cookie pair header extension */
181#define TCPOLEN_COOKIE_MIN (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN)
182#define TCPOLEN_COOKIE_MAX (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MAX)
180 183
181/* But this is what stacks really send out. */ 184/* But this is what stacks really send out. */
182#define TCPOLEN_TSTAMP_ALIGNED 12 185#define TCPOLEN_TSTAMP_ALIGNED 12
@@ -193,6 +196,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
193#define TCP_NAGLE_CORK 2 /* Socket is corked */ 196#define TCP_NAGLE_CORK 2 /* Socket is corked */
194#define TCP_NAGLE_PUSH 4 /* Cork is overridden for already queued data */ 197#define TCP_NAGLE_PUSH 4 /* Cork is overridden for already queued data */
195 198
199/* TCP thin-stream limits */
200#define TCP_THIN_LINEAR_RETRIES 6 /* After 6 linear retries, do exp. backoff */
201
196extern struct inet_timewait_death_row tcp_death_row; 202extern struct inet_timewait_death_row tcp_death_row;
197 203
198/* sysctl variables for tcp */ 204/* sysctl variables for tcp */
@@ -237,6 +243,9 @@ extern int sysctl_tcp_base_mss;
237extern int sysctl_tcp_workaround_signed_windows; 243extern int sysctl_tcp_workaround_signed_windows;
238extern int sysctl_tcp_slow_start_after_idle; 244extern int sysctl_tcp_slow_start_after_idle;
239extern int sysctl_tcp_max_ssthresh; 245extern int sysctl_tcp_max_ssthresh;
246extern int sysctl_tcp_cookie_size;
247extern int sysctl_tcp_thin_linear_timeouts;
248extern int sysctl_tcp_thin_dupack;
240 249
241extern atomic_t tcp_memory_allocated; 250extern atomic_t tcp_memory_allocated;
242extern struct percpu_counter tcp_sockets_allocated; 251extern struct percpu_counter tcp_sockets_allocated;
@@ -343,11 +352,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk,
343 352
344extern void tcp_enter_quickack_mode(struct sock *sk); 353extern void tcp_enter_quickack_mode(struct sock *sk);
345 354
346static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
347{
348 rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
349}
350
351#define TCP_ECN_OK 1 355#define TCP_ECN_OK 1
352#define TCP_ECN_QUEUE_CWR 2 356#define TCP_ECN_QUEUE_CWR 2
353#define TCP_ECN_DEMAND_CWR 4 357#define TCP_ECN_DEMAND_CWR 4
@@ -359,8 +363,7 @@ TCP_ECN_create_request(struct request_sock *req, struct tcphdr *th)
359 inet_rsk(req)->ecn_ok = 1; 363 inet_rsk(req)->ecn_ok = 1;
360} 364}
361 365
362enum tcp_tw_status 366enum tcp_tw_status {
363{
364 TCP_TW_SUCCESS = 0, 367 TCP_TW_SUCCESS = 0,
365 TCP_TW_RST = 1, 368 TCP_TW_RST = 1,
366 TCP_TW_ACK = 2, 369 TCP_TW_ACK = 2,
@@ -402,6 +405,8 @@ extern int compat_tcp_setsockopt(struct sock *sk,
402 int level, int optname, 405 int level, int optname,
403 char __user *optval, unsigned int optlen); 406 char __user *optval, unsigned int optlen);
404extern void tcp_set_keepalive(struct sock *sk, int val); 407extern void tcp_set_keepalive(struct sock *sk, int val);
408extern void tcp_syn_ack_timeout(struct sock *sk,
409 struct request_sock *req);
405extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, 410extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
406 struct msghdr *msg, 411 struct msghdr *msg,
407 size_t len, int nonblock, 412 size_t len, int nonblock,
@@ -409,6 +414,7 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
409 414
410extern void tcp_parse_options(struct sk_buff *skb, 415extern void tcp_parse_options(struct sk_buff *skb,
411 struct tcp_options_received *opt_rx, 416 struct tcp_options_received *opt_rx,
417 u8 **hvpp,
412 int estab); 418 int estab);
413 419
414extern u8 *tcp_parse_md5sig_option(struct tcphdr *th); 420extern u8 *tcp_parse_md5sig_option(struct tcphdr *th);
@@ -443,7 +449,8 @@ extern int tcp_connect(struct sock *sk);
443 449
444extern struct sk_buff * tcp_make_synack(struct sock *sk, 450extern struct sk_buff * tcp_make_synack(struct sock *sk,
445 struct dst_entry *dst, 451 struct dst_entry *dst,
446 struct request_sock *req); 452 struct request_sock *req,
453 struct request_values *rvp);
447 454
448extern int tcp_disconnect(struct sock *sk, int flags); 455extern int tcp_disconnect(struct sock *sk, int flags);
449 456
@@ -856,13 +863,6 @@ static inline void tcp_check_probe_timer(struct sock *sk)
856 icsk->icsk_rto, TCP_RTO_MAX); 863 icsk->icsk_rto, TCP_RTO_MAX);
857} 864}
858 865
859static inline void tcp_push_pending_frames(struct sock *sk)
860{
861 struct tcp_sock *tp = tcp_sk(sk);
862
863 __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle);
864}
865
866static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) 866static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)
867{ 867{
868 tp->snd_wl1 = seq; 868 tp->snd_wl1 = seq;
@@ -939,7 +939,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
939 939
940 tp->ucopy.memory = 0; 940 tp->ucopy.memory = 0;
941 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { 941 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
942 wake_up_interruptible_poll(sk->sk_sleep, 942 wake_up_interruptible_sync_poll(sk->sk_sleep,
943 POLLIN | POLLRDNORM | POLLRDBAND); 943 POLLIN | POLLRDNORM | POLLRDBAND);
944 if (!inet_csk_ack_scheduled(sk)) 944 if (!inet_csk_ack_scheduled(sk))
945 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 945 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
@@ -972,7 +972,8 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
972/* Determine a window scaling and initial window to offer. */ 972/* Determine a window scaling and initial window to offer. */
973extern void tcp_select_initial_window(int __space, __u32 mss, 973extern void tcp_select_initial_window(int __space, __u32 mss,
974 __u32 *rcv_wnd, __u32 *window_clamp, 974 __u32 *rcv_wnd, __u32 *window_clamp,
975 int wscale_ok, __u8 *rcv_wscale); 975 int wscale_ok, __u8 *rcv_wscale,
976 __u32 init_rcv_wnd);
976 977
977static inline int tcp_win_from_space(int space) 978static inline int tcp_win_from_space(int space)
978{ 979{
@@ -1193,33 +1194,18 @@ extern int tcp_v4_md5_do_del(struct sock *sk,
1193#define tcp_twsk_md5_key(twsk) NULL 1194#define tcp_twsk_md5_key(twsk) NULL
1194#endif 1195#endif
1195 1196
1196extern struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *); 1197extern struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *);
1197extern void tcp_free_md5sig_pool(void); 1198extern void tcp_free_md5sig_pool(void);
1198 1199
1199extern struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu); 1200extern struct tcp_md5sig_pool *tcp_get_md5sig_pool(void);
1200extern void __tcp_put_md5sig_pool(void); 1201extern void tcp_put_md5sig_pool(void);
1202
1201extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *); 1203extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *);
1202extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *, 1204extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *,
1203 unsigned header_len); 1205 unsigned header_len);
1204extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, 1206extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
1205 struct tcp_md5sig_key *key); 1207 struct tcp_md5sig_key *key);
1206 1208
1207static inline
1208struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
1209{
1210 int cpu = get_cpu();
1211 struct tcp_md5sig_pool *ret = __tcp_get_md5sig_pool(cpu);
1212 if (!ret)
1213 put_cpu();
1214 return ret;
1215}
1216
1217static inline void tcp_put_md5sig_pool(void)
1218{
1219 __tcp_put_md5sig_pool();
1220 put_cpu();
1221}
1222
1223/* write queue abstraction */ 1209/* write queue abstraction */
1224static inline void tcp_write_queue_purge(struct sock *sk) 1210static inline void tcp_write_queue_purge(struct sock *sk)
1225{ 1211{
@@ -1228,6 +1214,7 @@ static inline void tcp_write_queue_purge(struct sock *sk)
1228 while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) 1214 while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL)
1229 sk_wmem_free_skb(sk, skb); 1215 sk_wmem_free_skb(sk, skb);
1230 sk_mem_reclaim(sk); 1216 sk_mem_reclaim(sk);
1217 tcp_clear_all_retrans_hints(tcp_sk(sk));
1231} 1218}
1232 1219
1233static inline struct sk_buff *tcp_write_queue_head(struct sock *sk) 1220static inline struct sk_buff *tcp_write_queue_head(struct sock *sk)
@@ -1259,29 +1246,6 @@ static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_bu
1259#define tcp_for_write_queue_from_safe(skb, tmp, sk) \ 1246#define tcp_for_write_queue_from_safe(skb, tmp, sk) \
1260 skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) 1247 skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
1261 1248
1262/* This function calculates a "timeout" which is equivalent to the timeout of a
1263 * TCP connection after "boundary" unsucessful, exponentially backed-off
1264 * retransmissions with an initial RTO of TCP_RTO_MIN.
1265 */
1266static inline bool retransmits_timed_out(const struct sock *sk,
1267 unsigned int boundary)
1268{
1269 unsigned int timeout, linear_backoff_thresh;
1270
1271 if (!inet_csk(sk)->icsk_retransmits)
1272 return false;
1273
1274 linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN);
1275
1276 if (boundary <= linear_backoff_thresh)
1277 timeout = ((2 << boundary) - 1) * TCP_RTO_MIN;
1278 else
1279 timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN +
1280 (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
1281
1282 return (tcp_time_stamp - tcp_sk(sk)->retrans_stamp) >= timeout;
1283}
1284
1285static inline struct sk_buff *tcp_send_head(struct sock *sk) 1249static inline struct sk_buff *tcp_send_head(struct sock *sk)
1286{ 1250{
1287 return sk->sk_send_head; 1251 return sk->sk_send_head;
@@ -1364,6 +1328,15 @@ static inline int tcp_write_queue_empty(struct sock *sk)
1364 return skb_queue_empty(&sk->sk_write_queue); 1328 return skb_queue_empty(&sk->sk_write_queue);
1365} 1329}
1366 1330
1331static inline void tcp_push_pending_frames(struct sock *sk)
1332{
1333 if (tcp_send_head(sk)) {
1334 struct tcp_sock *tp = tcp_sk(sk);
1335
1336 __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle);
1337 }
1338}
1339
1367/* Start sequence of the highest skb with SACKed bit, valid only if 1340/* Start sequence of the highest skb with SACKed bit, valid only if
1368 * sacked > 0 or when the caller has ensured validity by itself. 1341 * sacked > 0 or when the caller has ensured validity by itself.
1369 */ 1342 */
@@ -1403,6 +1376,14 @@ static inline void tcp_highest_sack_combine(struct sock *sk,
1403 tcp_sk(sk)->highest_sack = new; 1376 tcp_sk(sk)->highest_sack = new;
1404} 1377}
1405 1378
1379/* Determines whether this is a thin stream (which may suffer from
1380 * increased latency). Used to trigger latency-reducing mechanisms.
1381 */
1382static inline unsigned int tcp_stream_is_thin(struct tcp_sock *tp)
1383{
1384 return tp->packets_out < 4 && !tcp_in_initial_slowstart(tp);
1385}
1386
1406/* /proc */ 1387/* /proc */
1407enum tcp_seq_states { 1388enum tcp_seq_states {
1408 TCP_SEQ_STATE_LISTENING, 1389 TCP_SEQ_STATE_LISTENING,
@@ -1480,6 +1461,91 @@ struct tcp_request_sock_ops {
1480#endif 1461#endif
1481}; 1462};
1482 1463
1464/* Using SHA1 for now, define some constants.
1465 */
1466#define COOKIE_DIGEST_WORDS (SHA_DIGEST_WORDS)
1467#define COOKIE_MESSAGE_WORDS (SHA_MESSAGE_BYTES / 4)
1468#define COOKIE_WORKSPACE_WORDS (COOKIE_DIGEST_WORDS + COOKIE_MESSAGE_WORDS)
1469
1470extern int tcp_cookie_generator(u32 *bakery);
1471
1472/**
1473 * struct tcp_cookie_values - each socket needs extra space for the
1474 * cookies, together with (optional) space for any SYN data.
1475 *
1476 * A tcp_sock contains a pointer to the current value, and this is
1477 * cloned to the tcp_timewait_sock.
1478 *
1479 * @cookie_pair: variable data from the option exchange.
1480 *
1481 * @cookie_desired: user specified tcpct_cookie_desired. Zero
1482 * indicates default (sysctl_tcp_cookie_size).
1483 * After cookie sent, remembers size of cookie.
1484 * Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX.
1485 *
1486 * @s_data_desired: user specified tcpct_s_data_desired. When the
1487 * constant payload is specified (@s_data_constant),
1488 * holds its length instead.
1489 * Range 0 to TCP_MSS_DESIRED.
1490 *
1491 * @s_data_payload: constant data that is to be included in the
1492 * payload of SYN or SYNACK segments when the
1493 * cookie option is present.
1494 */
1495struct tcp_cookie_values {
1496 struct kref kref;
1497 u8 cookie_pair[TCP_COOKIE_PAIR_SIZE];
1498 u8 cookie_pair_size;
1499 u8 cookie_desired;
1500 u16 s_data_desired:11,
1501 s_data_constant:1,
1502 s_data_in:1,
1503 s_data_out:1,
1504 s_data_unused:2;
1505 u8 s_data_payload[0];
1506};
1507
1508static inline void tcp_cookie_values_release(struct kref *kref)
1509{
1510 kfree(container_of(kref, struct tcp_cookie_values, kref));
1511}
1512
1513/* The length of constant payload data. Note that s_data_desired is
1514 * overloaded, depending on s_data_constant: either the length of constant
1515 * data (returned here) or the limit on variable data.
1516 */
1517static inline int tcp_s_data_size(const struct tcp_sock *tp)
1518{
1519 return (tp->cookie_values != NULL && tp->cookie_values->s_data_constant)
1520 ? tp->cookie_values->s_data_desired
1521 : 0;
1522}
1523
1524/**
1525 * struct tcp_extend_values - tcp_ipv?.c to tcp_output.c workspace.
1526 *
1527 * As tcp_request_sock has already been extended in other places, the
1528 * only remaining method is to pass stack values along as function
1529 * parameters. These parameters are not needed after sending SYNACK.
1530 *
1531 * @cookie_bakery: cryptographic secret and message workspace.
1532 *
1533 * @cookie_plus: bytes in authenticator/cookie option, copied from
1534 * struct tcp_options_received (above).
1535 */
1536struct tcp_extend_values {
1537 struct request_values rv;
1538 u32 cookie_bakery[COOKIE_WORKSPACE_WORDS];
1539 u8 cookie_plus:6,
1540 cookie_out_never:1,
1541 cookie_in_always:1;
1542};
1543
1544static inline struct tcp_extend_values *tcp_xv(struct request_values *rvp)
1545{
1546 return (struct tcp_extend_values *)rvp;
1547}
1548
1483extern void tcp_v4_init(void); 1549extern void tcp_v4_init(void);
1484extern void tcp_init(void); 1550extern void tcp_init(void);
1485 1551