diff options
Diffstat (limited to 'include/net/tcp.h')
-rw-r--r-- | include/net/tcp.h | 190 |
1 files changed, 128 insertions, 62 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h index 03a49c703377..aa04b9a5093b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/dmaengine.h> | 30 | #include <linux/dmaengine.h> |
31 | #include <linux/crypto.h> | 31 | #include <linux/crypto.h> |
32 | #include <linux/cryptohash.h> | 32 | #include <linux/cryptohash.h> |
33 | #include <linux/kref.h> | ||
33 | 34 | ||
34 | #include <net/inet_connection_sock.h> | 35 | #include <net/inet_connection_sock.h> |
35 | #include <net/inet_timewait_sock.h> | 36 | #include <net/inet_timewait_sock.h> |
@@ -62,9 +63,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); | |||
62 | /* Minimal accepted MSS. It is (60+60+8) - (20+20). */ | 63 | /* Minimal accepted MSS. It is (60+60+8) - (20+20). */ |
63 | #define TCP_MIN_MSS 88U | 64 | #define TCP_MIN_MSS 88U |
64 | 65 | ||
65 | /* Minimal RCV_MSS. */ | ||
66 | #define TCP_MIN_RCVMSS 536U | ||
67 | |||
68 | /* The least MTU to use for probing */ | 66 | /* The least MTU to use for probing */ |
69 | #define TCP_BASE_MSS 512 | 67 | #define TCP_BASE_MSS 512 |
70 | 68 | ||
@@ -167,6 +165,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); | |||
167 | #define TCPOPT_SACK 5 /* SACK Block */ | 165 | #define TCPOPT_SACK 5 /* SACK Block */ |
168 | #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ | 166 | #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ |
169 | #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ | 167 | #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ |
168 | #define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */ | ||
170 | 169 | ||
171 | /* | 170 | /* |
172 | * TCP option lengths | 171 | * TCP option lengths |
@@ -177,6 +176,10 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); | |||
177 | #define TCPOLEN_SACK_PERM 2 | 176 | #define TCPOLEN_SACK_PERM 2 |
178 | #define TCPOLEN_TIMESTAMP 10 | 177 | #define TCPOLEN_TIMESTAMP 10 |
179 | #define TCPOLEN_MD5SIG 18 | 178 | #define TCPOLEN_MD5SIG 18 |
179 | #define TCPOLEN_COOKIE_BASE 2 /* Cookie-less header extension */ | ||
180 | #define TCPOLEN_COOKIE_PAIR 3 /* Cookie pair header extension */ | ||
181 | #define TCPOLEN_COOKIE_MIN (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN) | ||
182 | #define TCPOLEN_COOKIE_MAX (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MAX) | ||
180 | 183 | ||
181 | /* But this is what stacks really send out. */ | 184 | /* But this is what stacks really send out. */ |
182 | #define TCPOLEN_TSTAMP_ALIGNED 12 | 185 | #define TCPOLEN_TSTAMP_ALIGNED 12 |
@@ -193,6 +196,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); | |||
193 | #define TCP_NAGLE_CORK 2 /* Socket is corked */ | 196 | #define TCP_NAGLE_CORK 2 /* Socket is corked */ |
194 | #define TCP_NAGLE_PUSH 4 /* Cork is overridden for already queued data */ | 197 | #define TCP_NAGLE_PUSH 4 /* Cork is overridden for already queued data */ |
195 | 198 | ||
199 | /* TCP thin-stream limits */ | ||
200 | #define TCP_THIN_LINEAR_RETRIES 6 /* After 6 linear retries, do exp. backoff */ | ||
201 | |||
196 | extern struct inet_timewait_death_row tcp_death_row; | 202 | extern struct inet_timewait_death_row tcp_death_row; |
197 | 203 | ||
198 | /* sysctl variables for tcp */ | 204 | /* sysctl variables for tcp */ |
@@ -237,6 +243,9 @@ extern int sysctl_tcp_base_mss; | |||
237 | extern int sysctl_tcp_workaround_signed_windows; | 243 | extern int sysctl_tcp_workaround_signed_windows; |
238 | extern int sysctl_tcp_slow_start_after_idle; | 244 | extern int sysctl_tcp_slow_start_after_idle; |
239 | extern int sysctl_tcp_max_ssthresh; | 245 | extern int sysctl_tcp_max_ssthresh; |
246 | extern int sysctl_tcp_cookie_size; | ||
247 | extern int sysctl_tcp_thin_linear_timeouts; | ||
248 | extern int sysctl_tcp_thin_dupack; | ||
240 | 249 | ||
241 | extern atomic_t tcp_memory_allocated; | 250 | extern atomic_t tcp_memory_allocated; |
242 | extern struct percpu_counter tcp_sockets_allocated; | 251 | extern struct percpu_counter tcp_sockets_allocated; |
@@ -343,11 +352,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk, | |||
343 | 352 | ||
344 | extern void tcp_enter_quickack_mode(struct sock *sk); | 353 | extern void tcp_enter_quickack_mode(struct sock *sk); |
345 | 354 | ||
346 | static inline void tcp_clear_options(struct tcp_options_received *rx_opt) | ||
347 | { | ||
348 | rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0; | ||
349 | } | ||
350 | |||
351 | #define TCP_ECN_OK 1 | 355 | #define TCP_ECN_OK 1 |
352 | #define TCP_ECN_QUEUE_CWR 2 | 356 | #define TCP_ECN_QUEUE_CWR 2 |
353 | #define TCP_ECN_DEMAND_CWR 4 | 357 | #define TCP_ECN_DEMAND_CWR 4 |
@@ -359,8 +363,7 @@ TCP_ECN_create_request(struct request_sock *req, struct tcphdr *th) | |||
359 | inet_rsk(req)->ecn_ok = 1; | 363 | inet_rsk(req)->ecn_ok = 1; |
360 | } | 364 | } |
361 | 365 | ||
362 | enum tcp_tw_status | 366 | enum tcp_tw_status { |
363 | { | ||
364 | TCP_TW_SUCCESS = 0, | 367 | TCP_TW_SUCCESS = 0, |
365 | TCP_TW_RST = 1, | 368 | TCP_TW_RST = 1, |
366 | TCP_TW_ACK = 2, | 369 | TCP_TW_ACK = 2, |
@@ -402,6 +405,8 @@ extern int compat_tcp_setsockopt(struct sock *sk, | |||
402 | int level, int optname, | 405 | int level, int optname, |
403 | char __user *optval, unsigned int optlen); | 406 | char __user *optval, unsigned int optlen); |
404 | extern void tcp_set_keepalive(struct sock *sk, int val); | 407 | extern void tcp_set_keepalive(struct sock *sk, int val); |
408 | extern void tcp_syn_ack_timeout(struct sock *sk, | ||
409 | struct request_sock *req); | ||
405 | extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, | 410 | extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, |
406 | struct msghdr *msg, | 411 | struct msghdr *msg, |
407 | size_t len, int nonblock, | 412 | size_t len, int nonblock, |
@@ -409,6 +414,7 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, | |||
409 | 414 | ||
410 | extern void tcp_parse_options(struct sk_buff *skb, | 415 | extern void tcp_parse_options(struct sk_buff *skb, |
411 | struct tcp_options_received *opt_rx, | 416 | struct tcp_options_received *opt_rx, |
417 | u8 **hvpp, | ||
412 | int estab); | 418 | int estab); |
413 | 419 | ||
414 | extern u8 *tcp_parse_md5sig_option(struct tcphdr *th); | 420 | extern u8 *tcp_parse_md5sig_option(struct tcphdr *th); |
@@ -443,7 +449,8 @@ extern int tcp_connect(struct sock *sk); | |||
443 | 449 | ||
444 | extern struct sk_buff * tcp_make_synack(struct sock *sk, | 450 | extern struct sk_buff * tcp_make_synack(struct sock *sk, |
445 | struct dst_entry *dst, | 451 | struct dst_entry *dst, |
446 | struct request_sock *req); | 452 | struct request_sock *req, |
453 | struct request_values *rvp); | ||
447 | 454 | ||
448 | extern int tcp_disconnect(struct sock *sk, int flags); | 455 | extern int tcp_disconnect(struct sock *sk, int flags); |
449 | 456 | ||
@@ -856,13 +863,6 @@ static inline void tcp_check_probe_timer(struct sock *sk) | |||
856 | icsk->icsk_rto, TCP_RTO_MAX); | 863 | icsk->icsk_rto, TCP_RTO_MAX); |
857 | } | 864 | } |
858 | 865 | ||
859 | static inline void tcp_push_pending_frames(struct sock *sk) | ||
860 | { | ||
861 | struct tcp_sock *tp = tcp_sk(sk); | ||
862 | |||
863 | __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle); | ||
864 | } | ||
865 | |||
866 | static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) | 866 | static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) |
867 | { | 867 | { |
868 | tp->snd_wl1 = seq; | 868 | tp->snd_wl1 = seq; |
@@ -939,7 +939,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) | |||
939 | 939 | ||
940 | tp->ucopy.memory = 0; | 940 | tp->ucopy.memory = 0; |
941 | } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { | 941 | } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { |
942 | wake_up_interruptible_poll(sk->sk_sleep, | 942 | wake_up_interruptible_sync_poll(sk->sk_sleep, |
943 | POLLIN | POLLRDNORM | POLLRDBAND); | 943 | POLLIN | POLLRDNORM | POLLRDBAND); |
944 | if (!inet_csk_ack_scheduled(sk)) | 944 | if (!inet_csk_ack_scheduled(sk)) |
945 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | 945 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, |
@@ -972,7 +972,8 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) | |||
972 | /* Determine a window scaling and initial window to offer. */ | 972 | /* Determine a window scaling and initial window to offer. */ |
973 | extern void tcp_select_initial_window(int __space, __u32 mss, | 973 | extern void tcp_select_initial_window(int __space, __u32 mss, |
974 | __u32 *rcv_wnd, __u32 *window_clamp, | 974 | __u32 *rcv_wnd, __u32 *window_clamp, |
975 | int wscale_ok, __u8 *rcv_wscale); | 975 | int wscale_ok, __u8 *rcv_wscale, |
976 | __u32 init_rcv_wnd); | ||
976 | 977 | ||
977 | static inline int tcp_win_from_space(int space) | 978 | static inline int tcp_win_from_space(int space) |
978 | { | 979 | { |
@@ -1193,33 +1194,18 @@ extern int tcp_v4_md5_do_del(struct sock *sk, | |||
1193 | #define tcp_twsk_md5_key(twsk) NULL | 1194 | #define tcp_twsk_md5_key(twsk) NULL |
1194 | #endif | 1195 | #endif |
1195 | 1196 | ||
1196 | extern struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *); | 1197 | extern struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *); |
1197 | extern void tcp_free_md5sig_pool(void); | 1198 | extern void tcp_free_md5sig_pool(void); |
1198 | 1199 | ||
1199 | extern struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu); | 1200 | extern struct tcp_md5sig_pool *tcp_get_md5sig_pool(void); |
1200 | extern void __tcp_put_md5sig_pool(void); | 1201 | extern void tcp_put_md5sig_pool(void); |
1202 | |||
1201 | extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *); | 1203 | extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *); |
1202 | extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *, | 1204 | extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *, |
1203 | unsigned header_len); | 1205 | unsigned header_len); |
1204 | extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, | 1206 | extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, |
1205 | struct tcp_md5sig_key *key); | 1207 | struct tcp_md5sig_key *key); |
1206 | 1208 | ||
1207 | static inline | ||
1208 | struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) | ||
1209 | { | ||
1210 | int cpu = get_cpu(); | ||
1211 | struct tcp_md5sig_pool *ret = __tcp_get_md5sig_pool(cpu); | ||
1212 | if (!ret) | ||
1213 | put_cpu(); | ||
1214 | return ret; | ||
1215 | } | ||
1216 | |||
1217 | static inline void tcp_put_md5sig_pool(void) | ||
1218 | { | ||
1219 | __tcp_put_md5sig_pool(); | ||
1220 | put_cpu(); | ||
1221 | } | ||
1222 | |||
1223 | /* write queue abstraction */ | 1209 | /* write queue abstraction */ |
1224 | static inline void tcp_write_queue_purge(struct sock *sk) | 1210 | static inline void tcp_write_queue_purge(struct sock *sk) |
1225 | { | 1211 | { |
@@ -1228,6 +1214,7 @@ static inline void tcp_write_queue_purge(struct sock *sk) | |||
1228 | while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) | 1214 | while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) |
1229 | sk_wmem_free_skb(sk, skb); | 1215 | sk_wmem_free_skb(sk, skb); |
1230 | sk_mem_reclaim(sk); | 1216 | sk_mem_reclaim(sk); |
1217 | tcp_clear_all_retrans_hints(tcp_sk(sk)); | ||
1231 | } | 1218 | } |
1232 | 1219 | ||
1233 | static inline struct sk_buff *tcp_write_queue_head(struct sock *sk) | 1220 | static inline struct sk_buff *tcp_write_queue_head(struct sock *sk) |
@@ -1259,29 +1246,6 @@ static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_bu | |||
1259 | #define tcp_for_write_queue_from_safe(skb, tmp, sk) \ | 1246 | #define tcp_for_write_queue_from_safe(skb, tmp, sk) \ |
1260 | skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) | 1247 | skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) |
1261 | 1248 | ||
1262 | /* This function calculates a "timeout" which is equivalent to the timeout of a | ||
1263 | * TCP connection after "boundary" unsucessful, exponentially backed-off | ||
1264 | * retransmissions with an initial RTO of TCP_RTO_MIN. | ||
1265 | */ | ||
1266 | static inline bool retransmits_timed_out(const struct sock *sk, | ||
1267 | unsigned int boundary) | ||
1268 | { | ||
1269 | unsigned int timeout, linear_backoff_thresh; | ||
1270 | |||
1271 | if (!inet_csk(sk)->icsk_retransmits) | ||
1272 | return false; | ||
1273 | |||
1274 | linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); | ||
1275 | |||
1276 | if (boundary <= linear_backoff_thresh) | ||
1277 | timeout = ((2 << boundary) - 1) * TCP_RTO_MIN; | ||
1278 | else | ||
1279 | timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN + | ||
1280 | (boundary - linear_backoff_thresh) * TCP_RTO_MAX; | ||
1281 | |||
1282 | return (tcp_time_stamp - tcp_sk(sk)->retrans_stamp) >= timeout; | ||
1283 | } | ||
1284 | |||
1285 | static inline struct sk_buff *tcp_send_head(struct sock *sk) | 1249 | static inline struct sk_buff *tcp_send_head(struct sock *sk) |
1286 | { | 1250 | { |
1287 | return sk->sk_send_head; | 1251 | return sk->sk_send_head; |
@@ -1364,6 +1328,15 @@ static inline int tcp_write_queue_empty(struct sock *sk) | |||
1364 | return skb_queue_empty(&sk->sk_write_queue); | 1328 | return skb_queue_empty(&sk->sk_write_queue); |
1365 | } | 1329 | } |
1366 | 1330 | ||
1331 | static inline void tcp_push_pending_frames(struct sock *sk) | ||
1332 | { | ||
1333 | if (tcp_send_head(sk)) { | ||
1334 | struct tcp_sock *tp = tcp_sk(sk); | ||
1335 | |||
1336 | __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle); | ||
1337 | } | ||
1338 | } | ||
1339 | |||
1367 | /* Start sequence of the highest skb with SACKed bit, valid only if | 1340 | /* Start sequence of the highest skb with SACKed bit, valid only if |
1368 | * sacked > 0 or when the caller has ensured validity by itself. | 1341 | * sacked > 0 or when the caller has ensured validity by itself. |
1369 | */ | 1342 | */ |
@@ -1403,6 +1376,14 @@ static inline void tcp_highest_sack_combine(struct sock *sk, | |||
1403 | tcp_sk(sk)->highest_sack = new; | 1376 | tcp_sk(sk)->highest_sack = new; |
1404 | } | 1377 | } |
1405 | 1378 | ||
1379 | /* Determines whether this is a thin stream (which may suffer from | ||
1380 | * increased latency). Used to trigger latency-reducing mechanisms. | ||
1381 | */ | ||
1382 | static inline unsigned int tcp_stream_is_thin(struct tcp_sock *tp) | ||
1383 | { | ||
1384 | return tp->packets_out < 4 && !tcp_in_initial_slowstart(tp); | ||
1385 | } | ||
1386 | |||
1406 | /* /proc */ | 1387 | /* /proc */ |
1407 | enum tcp_seq_states { | 1388 | enum tcp_seq_states { |
1408 | TCP_SEQ_STATE_LISTENING, | 1389 | TCP_SEQ_STATE_LISTENING, |
@@ -1480,6 +1461,91 @@ struct tcp_request_sock_ops { | |||
1480 | #endif | 1461 | #endif |
1481 | }; | 1462 | }; |
1482 | 1463 | ||
1464 | /* Using SHA1 for now, define some constants. | ||
1465 | */ | ||
1466 | #define COOKIE_DIGEST_WORDS (SHA_DIGEST_WORDS) | ||
1467 | #define COOKIE_MESSAGE_WORDS (SHA_MESSAGE_BYTES / 4) | ||
1468 | #define COOKIE_WORKSPACE_WORDS (COOKIE_DIGEST_WORDS + COOKIE_MESSAGE_WORDS) | ||
1469 | |||
1470 | extern int tcp_cookie_generator(u32 *bakery); | ||
1471 | |||
1472 | /** | ||
1473 | * struct tcp_cookie_values - each socket needs extra space for the | ||
1474 | * cookies, together with (optional) space for any SYN data. | ||
1475 | * | ||
1476 | * A tcp_sock contains a pointer to the current value, and this is | ||
1477 | * cloned to the tcp_timewait_sock. | ||
1478 | * | ||
1479 | * @cookie_pair: variable data from the option exchange. | ||
1480 | * | ||
1481 | * @cookie_desired: user specified tcpct_cookie_desired. Zero | ||
1482 | * indicates default (sysctl_tcp_cookie_size). | ||
1483 | * After cookie sent, remembers size of cookie. | ||
1484 | * Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX. | ||
1485 | * | ||
1486 | * @s_data_desired: user specified tcpct_s_data_desired. When the | ||
1487 | * constant payload is specified (@s_data_constant), | ||
1488 | * holds its length instead. | ||
1489 | * Range 0 to TCP_MSS_DESIRED. | ||
1490 | * | ||
1491 | * @s_data_payload: constant data that is to be included in the | ||
1492 | * payload of SYN or SYNACK segments when the | ||
1493 | * cookie option is present. | ||
1494 | */ | ||
1495 | struct tcp_cookie_values { | ||
1496 | struct kref kref; | ||
1497 | u8 cookie_pair[TCP_COOKIE_PAIR_SIZE]; | ||
1498 | u8 cookie_pair_size; | ||
1499 | u8 cookie_desired; | ||
1500 | u16 s_data_desired:11, | ||
1501 | s_data_constant:1, | ||
1502 | s_data_in:1, | ||
1503 | s_data_out:1, | ||
1504 | s_data_unused:2; | ||
1505 | u8 s_data_payload[0]; | ||
1506 | }; | ||
1507 | |||
1508 | static inline void tcp_cookie_values_release(struct kref *kref) | ||
1509 | { | ||
1510 | kfree(container_of(kref, struct tcp_cookie_values, kref)); | ||
1511 | } | ||
1512 | |||
1513 | /* The length of constant payload data. Note that s_data_desired is | ||
1514 | * overloaded, depending on s_data_constant: either the length of constant | ||
1515 | * data (returned here) or the limit on variable data. | ||
1516 | */ | ||
1517 | static inline int tcp_s_data_size(const struct tcp_sock *tp) | ||
1518 | { | ||
1519 | return (tp->cookie_values != NULL && tp->cookie_values->s_data_constant) | ||
1520 | ? tp->cookie_values->s_data_desired | ||
1521 | : 0; | ||
1522 | } | ||
1523 | |||
1524 | /** | ||
1525 | * struct tcp_extend_values - tcp_ipv?.c to tcp_output.c workspace. | ||
1526 | * | ||
1527 | * As tcp_request_sock has already been extended in other places, the | ||
1528 | * only remaining method is to pass stack values along as function | ||
1529 | * parameters. These parameters are not needed after sending SYNACK. | ||
1530 | * | ||
1531 | * @cookie_bakery: cryptographic secret and message workspace. | ||
1532 | * | ||
1533 | * @cookie_plus: bytes in authenticator/cookie option, copied from | ||
1534 | * struct tcp_options_received (above). | ||
1535 | */ | ||
1536 | struct tcp_extend_values { | ||
1537 | struct request_values rv; | ||
1538 | u32 cookie_bakery[COOKIE_WORKSPACE_WORDS]; | ||
1539 | u8 cookie_plus:6, | ||
1540 | cookie_out_never:1, | ||
1541 | cookie_in_always:1; | ||
1542 | }; | ||
1543 | |||
1544 | static inline struct tcp_extend_values *tcp_xv(struct request_values *rvp) | ||
1545 | { | ||
1546 | return (struct tcp_extend_values *)rvp; | ||
1547 | } | ||
1548 | |||
1483 | extern void tcp_v4_init(void); | 1549 | extern void tcp_v4_init(void); |
1484 | extern void tcp_init(void); | 1550 | extern void tcp_init(void); |
1485 | 1551 | ||