diff options
author | Jerry Chu <hkchu@google.com> | 2012-08-31 08:29:11 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-08-31 20:02:18 -0400 |
commit | 1046716368979dee857a2b8a91c4a8833f21b9cb (patch) | |
tree | fdda70278e6fa56c27d242fb1a0ec9b7e3e81d38 | |
parent | 2a35cfa591ac63f17815c2d9432b799e37527980 (diff) |
tcp: TCP Fast Open Server - header & support functions
This patch adds all the necessary data structure and support
functions to implement TFO server side. It also documents a number
of flags for the sysctl_tcp_fastopen knob, and adds a few Linux
extension MIBs.
In addition, it includes the following:
1. a new TCP_FASTOPEN socket option an application must call to
supply a max backlog allowed in order to enable TFO on its listener.
2. A number of key data structures:
"fastopen_rsk" in tcp_sock - for a big socket to access its
request_sock for retransmission and ack processing purpose. It is
non-NULL iff 3WHS not completed.
"fastopenq" in request_sock_queue - points to a per Fast Open
listener data structure "fastopen_queue" to keep track of qlen (# of
outstanding Fast Open requests) and max_qlen, among other things.
"listener" in tcp_request_sock - to point to the original listener
for book-keeping purpose, i.e., to maintain qlen against max_qlen
as part of defense against IP spoofing attack.
3. various data structure and functions, many in tcp_fastopen.c, to
support server side Fast Open cookie operations, including
/proc/sys/net/ipv4/tcp_fastopen_key to allow manual rekeying.
Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | Documentation/networking/ip-sysctl.txt | 29 | ||||
-rw-r--r-- | include/linux/snmp.h | 4 | ||||
-rw-r--r-- | include/linux/tcp.h | 45 | ||||
-rw-r--r-- | include/net/request_sock.h | 36 | ||||
-rw-r--r-- | include/net/tcp.h | 46 | ||||
-rw-r--r-- | net/ipv4/proc.c | 4 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 45 | ||||
-rw-r--r-- | net/ipv4/tcp_fastopen.c | 83 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 4 |
9 files changed, 276 insertions, 20 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index d64e53124b8c..c7fc10724948 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt | |||
@@ -467,16 +467,31 @@ tcp_syncookies - BOOLEAN | |||
467 | tcp_fastopen - INTEGER | 467 | tcp_fastopen - INTEGER |
468 | Enable TCP Fast Open feature (draft-ietf-tcpm-fastopen) to send data | 468 | Enable TCP Fast Open feature (draft-ietf-tcpm-fastopen) to send data |
469 | in the opening SYN packet. To use this feature, the client application | 469 | in the opening SYN packet. To use this feature, the client application |
470 | must not use connect(). Instead, it should use sendmsg() or sendto() | 470 | must use sendmsg() or sendto() with MSG_FASTOPEN flag rather than |
471 | with MSG_FASTOPEN flag which performs a TCP handshake automatically. | 471 | connect() to perform a TCP handshake automatically. |
472 | 472 | ||
473 | The values (bitmap) are: | 473 | The values (bitmap) are |
474 | 1: Enables sending data in the opening SYN on the client | 474 | 1: Enables sending data in the opening SYN on the client. |
475 | 5: Enables sending data in the opening SYN on the client regardless | 475 | 2: Enables TCP Fast Open on the server side, i.e., allowing data in |
476 | of cookie availability. | 476 | a SYN packet to be accepted and passed to the application before |
477 | 3-way hand shake finishes. | ||
478 | 4: Send data in the opening SYN regardless of cookie availability and | ||
479 | without a cookie option. | ||
480 | 0x100: Accept SYN data w/o validating the cookie. | ||
481 | 0x200: Accept data-in-SYN w/o any cookie option present. | ||
482 | 0x400/0x800: Enable Fast Open on all listeners regardless of the | ||
483 | TCP_FASTOPEN socket option. The two different flags designate two | ||
484 | different ways of setting max_qlen without the TCP_FASTOPEN socket | ||
485 | option. | ||
477 | 486 | ||
478 | Default: 0 | 487 | Default: 0 |
479 | 488 | ||
489 | Note that the client & server side Fast Open flags (1 and 2 | ||
490 | respectively) must be also enabled before the rest of flags can take | ||
491 | effect. | ||
492 | |||
493 | See include/net/tcp.h and the code for more details. | ||
494 | |||
480 | tcp_syn_retries - INTEGER | 495 | tcp_syn_retries - INTEGER |
481 | Number of times initial SYNs for an active TCP connection attempt | 496 | Number of times initial SYNs for an active TCP connection attempt |
482 | will be retransmitted. Should not be higher than 255. Default value | 497 | will be retransmitted. Should not be higher than 255. Default value |
diff --git a/include/linux/snmp.h b/include/linux/snmp.h index ad6e3a6bf9fb..fdfba235f9f1 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h | |||
@@ -241,6 +241,10 @@ enum | |||
241 | LINUX_MIB_TCPCHALLENGEACK, /* TCPChallengeACK */ | 241 | LINUX_MIB_TCPCHALLENGEACK, /* TCPChallengeACK */ |
242 | LINUX_MIB_TCPSYNCHALLENGE, /* TCPSYNChallenge */ | 242 | LINUX_MIB_TCPSYNCHALLENGE, /* TCPSYNChallenge */ |
243 | LINUX_MIB_TCPFASTOPENACTIVE, /* TCPFastOpenActive */ | 243 | LINUX_MIB_TCPFASTOPENACTIVE, /* TCPFastOpenActive */ |
244 | LINUX_MIB_TCPFASTOPENPASSIVE, /* TCPFastOpenPassive*/ | ||
245 | LINUX_MIB_TCPFASTOPENPASSIVEFAIL, /* TCPFastOpenPassiveFail */ | ||
246 | LINUX_MIB_TCPFASTOPENLISTENOVERFLOW, /* TCPFastOpenListenOverflow */ | ||
247 | LINUX_MIB_TCPFASTOPENCOOKIEREQD, /* TCPFastOpenCookieReqd */ | ||
244 | __LINUX_MIB_MAX | 248 | __LINUX_MIB_MAX |
245 | }; | 249 | }; |
246 | 250 | ||
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index eb125a4c30b3..ae46df590629 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h | |||
@@ -110,6 +110,7 @@ enum { | |||
110 | #define TCP_REPAIR_QUEUE 20 | 110 | #define TCP_REPAIR_QUEUE 20 |
111 | #define TCP_QUEUE_SEQ 21 | 111 | #define TCP_QUEUE_SEQ 21 |
112 | #define TCP_REPAIR_OPTIONS 22 | 112 | #define TCP_REPAIR_OPTIONS 22 |
113 | #define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */ | ||
113 | 114 | ||
114 | struct tcp_repair_opt { | 115 | struct tcp_repair_opt { |
115 | __u32 opt_code; | 116 | __u32 opt_code; |
@@ -246,6 +247,7 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) | |||
246 | /* TCP Fast Open */ | 247 | /* TCP Fast Open */ |
247 | #define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */ | 248 | #define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */ |
248 | #define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */ | 249 | #define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */ |
250 | #define TCP_FASTOPEN_COOKIE_SIZE 8 /* the size employed by this impl. */ | ||
249 | 251 | ||
250 | /* TCP Fast Open Cookie as stored in memory */ | 252 | /* TCP Fast Open Cookie as stored in memory */ |
251 | struct tcp_fastopen_cookie { | 253 | struct tcp_fastopen_cookie { |
@@ -312,9 +314,14 @@ struct tcp_request_sock { | |||
312 | /* Only used by TCP MD5 Signature so far. */ | 314 | /* Only used by TCP MD5 Signature so far. */ |
313 | const struct tcp_request_sock_ops *af_specific; | 315 | const struct tcp_request_sock_ops *af_specific; |
314 | #endif | 316 | #endif |
317 | struct sock *listener; /* needed for TFO */ | ||
315 | u32 rcv_isn; | 318 | u32 rcv_isn; |
316 | u32 snt_isn; | 319 | u32 snt_isn; |
317 | u32 snt_synack; /* synack sent time */ | 320 | u32 snt_synack; /* synack sent time */ |
321 | u32 rcv_nxt; /* the ack # by SYNACK. For | ||
322 | * FastOpen it's the seq# | ||
323 | * after data-in-SYN. | ||
324 | */ | ||
318 | }; | 325 | }; |
319 | 326 | ||
320 | static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) | 327 | static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) |
@@ -505,14 +512,18 @@ struct tcp_sock { | |||
505 | struct tcp_md5sig_info __rcu *md5sig_info; | 512 | struct tcp_md5sig_info __rcu *md5sig_info; |
506 | #endif | 513 | #endif |
507 | 514 | ||
508 | /* TCP fastopen related information */ | ||
509 | struct tcp_fastopen_request *fastopen_req; | ||
510 | |||
511 | /* When the cookie options are generated and exchanged, then this | 515 | /* When the cookie options are generated and exchanged, then this |
512 | * object holds a reference to them (cookie_values->kref). Also | 516 | * object holds a reference to them (cookie_values->kref). Also |
513 | * contains related tcp_cookie_transactions fields. | 517 | * contains related tcp_cookie_transactions fields. |
514 | */ | 518 | */ |
515 | struct tcp_cookie_values *cookie_values; | 519 | struct tcp_cookie_values *cookie_values; |
520 | |||
521 | /* TCP fastopen related information */ | ||
522 | struct tcp_fastopen_request *fastopen_req; | ||
523 | /* fastopen_rsk points to request_sock that resulted in this big | ||
524 | * socket. Used to retransmit SYNACKs etc. | ||
525 | */ | ||
526 | struct request_sock *fastopen_rsk; | ||
516 | }; | 527 | }; |
517 | 528 | ||
518 | enum tsq_flags { | 529 | enum tsq_flags { |
@@ -552,6 +563,34 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) | |||
552 | return (struct tcp_timewait_sock *)sk; | 563 | return (struct tcp_timewait_sock *)sk; |
553 | } | 564 | } |
554 | 565 | ||
566 | static inline bool tcp_passive_fastopen(const struct sock *sk) | ||
567 | { | ||
568 | return (sk->sk_state == TCP_SYN_RECV && | ||
569 | tcp_sk(sk)->fastopen_rsk != NULL); | ||
570 | } | ||
571 | |||
572 | static inline bool fastopen_cookie_present(struct tcp_fastopen_cookie *foc) | ||
573 | { | ||
574 | return foc->len != -1; | ||
575 | } | ||
576 | |||
577 | static inline int fastopen_init_queue(struct sock *sk, int backlog) | ||
578 | { | ||
579 | struct request_sock_queue *queue = | ||
580 | &inet_csk(sk)->icsk_accept_queue; | ||
581 | |||
582 | if (queue->fastopenq == NULL) { | ||
583 | queue->fastopenq = kzalloc( | ||
584 | sizeof(struct fastopen_queue), | ||
585 | sk->sk_allocation); | ||
586 | if (queue->fastopenq == NULL) | ||
587 | return -ENOMEM; | ||
588 | spin_lock_init(&queue->fastopenq->lock); | ||
589 | } | ||
590 | queue->fastopenq->max_qlen = backlog; | ||
591 | return 0; | ||
592 | } | ||
593 | |||
555 | #endif /* __KERNEL__ */ | 594 | #endif /* __KERNEL__ */ |
556 | 595 | ||
557 | #endif /* _LINUX_TCP_H */ | 596 | #endif /* _LINUX_TCP_H */ |
diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 4c0766e201e3..c3cdd6c9f448 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h | |||
@@ -106,6 +106,34 @@ struct listen_sock { | |||
106 | struct request_sock *syn_table[0]; | 106 | struct request_sock *syn_table[0]; |
107 | }; | 107 | }; |
108 | 108 | ||
109 | /* | ||
110 | * For a TCP Fast Open listener - | ||
111 | * lock - protects the access to all the reqsk, which is co-owned by | ||
112 | * the listener and the child socket. | ||
113 | * qlen - pending TFO requests (still in TCP_SYN_RECV). | ||
114 | * max_qlen - max TFO reqs allowed before TFO is disabled. | ||
115 | * | ||
116 | * XXX (TFO) - ideally these fields can be made as part of "listen_sock" | ||
117 | * structure above. But there is some implementation difficulty due to | ||
118 | * listen_sock being part of request_sock_queue hence will be freed when | ||
119 | * a listener is stopped. But TFO related fields may continue to be | ||
120 | * accessed even after a listener is closed, until its sk_refcnt drops | ||
121 | * to 0 implying no more outstanding TFO reqs. One solution is to keep | ||
122 | * listen_opt around until sk_refcnt drops to 0. But there is some other | ||
123 | * complexity that needs to be resolved. E.g., a listener can be disabled | ||
124 | * temporarily through shutdown()->tcp_disconnect(), and re-enabled later. | ||
125 | */ | ||
126 | struct fastopen_queue { | ||
127 | struct request_sock *rskq_rst_head; /* Keep track of past TFO */ | ||
128 | struct request_sock *rskq_rst_tail; /* requests that caused RST. | ||
129 | * This is part of the defense | ||
130 | * against spoofing attack. | ||
131 | */ | ||
132 | spinlock_t lock; | ||
133 | int qlen; /* # of pending (TCP_SYN_RECV) reqs */ | ||
134 | int max_qlen; /* != 0 iff TFO is currently enabled */ | ||
135 | }; | ||
136 | |||
109 | /** struct request_sock_queue - queue of request_socks | 137 | /** struct request_sock_queue - queue of request_socks |
110 | * | 138 | * |
111 | * @rskq_accept_head - FIFO head of established children | 139 | * @rskq_accept_head - FIFO head of established children |
@@ -129,6 +157,12 @@ struct request_sock_queue { | |||
129 | u8 rskq_defer_accept; | 157 | u8 rskq_defer_accept; |
130 | /* 3 bytes hole, try to pack */ | 158 | /* 3 bytes hole, try to pack */ |
131 | struct listen_sock *listen_opt; | 159 | struct listen_sock *listen_opt; |
160 | struct fastopen_queue *fastopenq; /* This is non-NULL iff TFO has been | ||
161 | * enabled on this listener. Check | ||
162 | * max_qlen != 0 in fastopen_queue | ||
163 | * to determine if TFO is enabled | ||
164 | * right at this moment. | ||
165 | */ | ||
132 | }; | 166 | }; |
133 | 167 | ||
134 | extern int reqsk_queue_alloc(struct request_sock_queue *queue, | 168 | extern int reqsk_queue_alloc(struct request_sock_queue *queue, |
@@ -136,6 +170,8 @@ extern int reqsk_queue_alloc(struct request_sock_queue *queue, | |||
136 | 170 | ||
137 | extern void __reqsk_queue_destroy(struct request_sock_queue *queue); | 171 | extern void __reqsk_queue_destroy(struct request_sock_queue *queue); |
138 | extern void reqsk_queue_destroy(struct request_sock_queue *queue); | 172 | extern void reqsk_queue_destroy(struct request_sock_queue *queue); |
173 | extern void reqsk_fastopen_remove(struct sock *sk, | ||
174 | struct request_sock *req, bool reset); | ||
139 | 175 | ||
140 | static inline struct request_sock * | 176 | static inline struct request_sock * |
141 | reqsk_queue_yank_acceptq(struct request_sock_queue *queue) | 177 | reqsk_queue_yank_acceptq(struct request_sock_queue *queue) |
diff --git a/include/net/tcp.h b/include/net/tcp.h index 0fca06f16463..9f8821e3293a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -224,8 +224,24 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); | |||
224 | 224 | ||
225 | /* Bit Flags for sysctl_tcp_fastopen */ | 225 | /* Bit Flags for sysctl_tcp_fastopen */ |
226 | #define TFO_CLIENT_ENABLE 1 | 226 | #define TFO_CLIENT_ENABLE 1 |
227 | #define TFO_SERVER_ENABLE 2 | ||
227 | #define TFO_CLIENT_NO_COOKIE 4 /* Data in SYN w/o cookie option */ | 228 | #define TFO_CLIENT_NO_COOKIE 4 /* Data in SYN w/o cookie option */ |
228 | 229 | ||
230 | /* Process SYN data but skip cookie validation */ | ||
231 | #define TFO_SERVER_COOKIE_NOT_CHKED 0x100 | ||
232 | /* Accept SYN data w/o any cookie option */ | ||
233 | #define TFO_SERVER_COOKIE_NOT_REQD 0x200 | ||
234 | |||
235 | /* Force enable TFO on all listeners, i.e., not requiring the | ||
236 | * TCP_FASTOPEN socket option. SOCKOPT1/2 determine how to set max_qlen. | ||
237 | */ | ||
238 | #define TFO_SERVER_WO_SOCKOPT1 0x400 | ||
239 | #define TFO_SERVER_WO_SOCKOPT2 0x800 | ||
240 | /* Always create TFO child sockets on a TFO listener even when | ||
241 | * cookie/data not present. (For testing purpose!) | ||
242 | */ | ||
243 | #define TFO_SERVER_ALWAYS 0x1000 | ||
244 | |||
229 | extern struct inet_timewait_death_row tcp_death_row; | 245 | extern struct inet_timewait_death_row tcp_death_row; |
230 | 246 | ||
231 | /* sysctl variables for tcp */ | 247 | /* sysctl variables for tcp */ |
@@ -421,12 +437,6 @@ extern void tcp_metrics_init(void); | |||
421 | extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check); | 437 | extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check); |
422 | extern bool tcp_remember_stamp(struct sock *sk); | 438 | extern bool tcp_remember_stamp(struct sock *sk); |
423 | extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw); | 439 | extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw); |
424 | extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, | ||
425 | struct tcp_fastopen_cookie *cookie, | ||
426 | int *syn_loss, unsigned long *last_syn_loss); | ||
427 | extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss, | ||
428 | struct tcp_fastopen_cookie *cookie, | ||
429 | bool syn_lost); | ||
430 | extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst); | 440 | extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst); |
431 | extern void tcp_disable_fack(struct tcp_sock *tp); | 441 | extern void tcp_disable_fack(struct tcp_sock *tp); |
432 | extern void tcp_close(struct sock *sk, long timeout); | 442 | extern void tcp_close(struct sock *sk, long timeout); |
@@ -537,6 +547,7 @@ extern void tcp_send_delayed_ack(struct sock *sk); | |||
537 | extern void tcp_cwnd_application_limited(struct sock *sk); | 547 | extern void tcp_cwnd_application_limited(struct sock *sk); |
538 | extern void tcp_resume_early_retransmit(struct sock *sk); | 548 | extern void tcp_resume_early_retransmit(struct sock *sk); |
539 | extern void tcp_rearm_rto(struct sock *sk); | 549 | extern void tcp_rearm_rto(struct sock *sk); |
550 | extern void tcp_reset(struct sock *sk); | ||
540 | 551 | ||
541 | /* tcp_timer.c */ | 552 | /* tcp_timer.c */ |
542 | extern void tcp_init_xmit_timers(struct sock *); | 553 | extern void tcp_init_xmit_timers(struct sock *); |
@@ -586,6 +597,7 @@ extern int tcp_mtu_to_mss(struct sock *sk, int pmtu); | |||
586 | extern int tcp_mss_to_mtu(struct sock *sk, int mss); | 597 | extern int tcp_mss_to_mtu(struct sock *sk, int mss); |
587 | extern void tcp_mtup_init(struct sock *sk); | 598 | extern void tcp_mtup_init(struct sock *sk); |
588 | extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt); | 599 | extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt); |
600 | extern void tcp_init_buffer_space(struct sock *sk); | ||
589 | 601 | ||
590 | static inline void tcp_bound_rto(const struct sock *sk) | 602 | static inline void tcp_bound_rto(const struct sock *sk) |
591 | { | 603 | { |
@@ -1104,6 +1116,7 @@ static inline void tcp_openreq_init(struct request_sock *req, | |||
1104 | req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */ | 1116 | req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */ |
1105 | req->cookie_ts = 0; | 1117 | req->cookie_ts = 0; |
1106 | tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; | 1118 | tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; |
1119 | tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; | ||
1107 | req->mss = rx_opt->mss_clamp; | 1120 | req->mss = rx_opt->mss_clamp; |
1108 | req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; | 1121 | req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; |
1109 | ireq->tstamp_ok = rx_opt->tstamp_ok; | 1122 | ireq->tstamp_ok = rx_opt->tstamp_ok; |
@@ -1308,15 +1321,34 @@ extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff | |||
1308 | extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, | 1321 | extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, |
1309 | const struct tcp_md5sig_key *key); | 1322 | const struct tcp_md5sig_key *key); |
1310 | 1323 | ||
1324 | /* From tcp_fastopen.c */ | ||
1325 | extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, | ||
1326 | struct tcp_fastopen_cookie *cookie, | ||
1327 | int *syn_loss, unsigned long *last_syn_loss); | ||
1328 | extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss, | ||
1329 | struct tcp_fastopen_cookie *cookie, | ||
1330 | bool syn_lost); | ||
1311 | struct tcp_fastopen_request { | 1331 | struct tcp_fastopen_request { |
1312 | /* Fast Open cookie. Size 0 means a cookie request */ | 1332 | /* Fast Open cookie. Size 0 means a cookie request */ |
1313 | struct tcp_fastopen_cookie cookie; | 1333 | struct tcp_fastopen_cookie cookie; |
1314 | struct msghdr *data; /* data in MSG_FASTOPEN */ | 1334 | struct msghdr *data; /* data in MSG_FASTOPEN */ |
1315 | u16 copied; /* queued in tcp_connect() */ | 1335 | u16 copied; /* queued in tcp_connect() */ |
1316 | }; | 1336 | }; |
1317 | |||
1318 | void tcp_free_fastopen_req(struct tcp_sock *tp); | 1337 | void tcp_free_fastopen_req(struct tcp_sock *tp); |
1319 | 1338 | ||
1339 | extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; | ||
1340 | int tcp_fastopen_reset_cipher(void *key, unsigned int len); | ||
1341 | void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc); | ||
1342 | |||
1343 | #define TCP_FASTOPEN_KEY_LENGTH 16 | ||
1344 | |||
1345 | /* Fastopen key context */ | ||
1346 | struct tcp_fastopen_context { | ||
1347 | struct crypto_cipher __rcu *tfm; | ||
1348 | __u8 key[TCP_FASTOPEN_KEY_LENGTH]; | ||
1349 | struct rcu_head rcu; | ||
1350 | }; | ||
1351 | |||
1320 | /* write queue abstraction */ | 1352 | /* write queue abstraction */ |
1321 | static inline void tcp_write_queue_purge(struct sock *sk) | 1353 | static inline void tcp_write_queue_purge(struct sock *sk) |
1322 | { | 1354 | { |
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 957acd12250b..8de53e1ddd54 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -263,6 +263,10 @@ static const struct snmp_mib snmp4_net_list[] = { | |||
263 | SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK), | 263 | SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK), |
264 | SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE), | 264 | SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE), |
265 | SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE), | 265 | SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE), |
266 | SNMP_MIB_ITEM("TCPFastOpenPassive", LINUX_MIB_TCPFASTOPENPASSIVE), | ||
267 | SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL), | ||
268 | SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), | ||
269 | SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), | ||
266 | SNMP_MIB_SENTINEL | 270 | SNMP_MIB_SENTINEL |
267 | }; | 271 | }; |
268 | 272 | ||
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 3e78c79b5586..9205e492dc9d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -232,6 +232,45 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, | |||
232 | return 0; | 232 | return 0; |
233 | } | 233 | } |
234 | 234 | ||
235 | int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer, | ||
236 | size_t *lenp, loff_t *ppos) | ||
237 | { | ||
238 | ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; | ||
239 | struct tcp_fastopen_context *ctxt; | ||
240 | int ret; | ||
241 | u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ | ||
242 | |||
243 | tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); | ||
244 | if (!tbl.data) | ||
245 | return -ENOMEM; | ||
246 | |||
247 | rcu_read_lock(); | ||
248 | ctxt = rcu_dereference(tcp_fastopen_ctx); | ||
249 | if (ctxt) | ||
250 | memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); | ||
251 | rcu_read_unlock(); | ||
252 | |||
253 | snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x", | ||
254 | user_key[0], user_key[1], user_key[2], user_key[3]); | ||
255 | ret = proc_dostring(&tbl, write, buffer, lenp, ppos); | ||
256 | |||
257 | if (write && ret == 0) { | ||
258 | if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1, | ||
259 | user_key + 2, user_key + 3) != 4) { | ||
260 | ret = -EINVAL; | ||
261 | goto bad_key; | ||
262 | } | ||
263 | tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH); | ||
264 | } | ||
265 | |||
266 | bad_key: | ||
267 | pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n", | ||
268 | user_key[0], user_key[1], user_key[2], user_key[3], | ||
269 | (char *)tbl.data, ret); | ||
270 | kfree(tbl.data); | ||
271 | return ret; | ||
272 | } | ||
273 | |||
235 | static struct ctl_table ipv4_table[] = { | 274 | static struct ctl_table ipv4_table[] = { |
236 | { | 275 | { |
237 | .procname = "tcp_timestamps", | 276 | .procname = "tcp_timestamps", |
@@ -386,6 +425,12 @@ static struct ctl_table ipv4_table[] = { | |||
386 | .proc_handler = proc_dointvec, | 425 | .proc_handler = proc_dointvec, |
387 | }, | 426 | }, |
388 | { | 427 | { |
428 | .procname = "tcp_fastopen_key", | ||
429 | .mode = 0600, | ||
430 | .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), | ||
431 | .proc_handler = proc_tcp_fastopen_key, | ||
432 | }, | ||
433 | { | ||
389 | .procname = "tcp_tw_recycle", | 434 | .procname = "tcp_tw_recycle", |
390 | .data = &tcp_death_row.sysctl_tw_recycle, | 435 | .data = &tcp_death_row.sysctl_tw_recycle, |
391 | .maxlen = sizeof(int), | 436 | .maxlen = sizeof(int), |
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index a7f729c409d7..8f7ef0ad80e5 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c | |||
@@ -1,10 +1,91 @@ | |||
1 | #include <linux/err.h> | ||
1 | #include <linux/init.h> | 2 | #include <linux/init.h> |
2 | #include <linux/kernel.h> | 3 | #include <linux/kernel.h> |
4 | #include <linux/list.h> | ||
5 | #include <linux/tcp.h> | ||
6 | #include <linux/rcupdate.h> | ||
7 | #include <linux/rculist.h> | ||
8 | #include <net/inetpeer.h> | ||
9 | #include <net/tcp.h> | ||
3 | 10 | ||
4 | int sysctl_tcp_fastopen; | 11 | int sysctl_tcp_fastopen __read_mostly; |
12 | |||
13 | struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; | ||
14 | |||
15 | static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock); | ||
16 | |||
17 | static void tcp_fastopen_ctx_free(struct rcu_head *head) | ||
18 | { | ||
19 | struct tcp_fastopen_context *ctx = | ||
20 | container_of(head, struct tcp_fastopen_context, rcu); | ||
21 | crypto_free_cipher(ctx->tfm); | ||
22 | kfree(ctx); | ||
23 | } | ||
24 | |||
25 | int tcp_fastopen_reset_cipher(void *key, unsigned int len) | ||
26 | { | ||
27 | int err; | ||
28 | struct tcp_fastopen_context *ctx, *octx; | ||
29 | |||
30 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | ||
31 | if (!ctx) | ||
32 | return -ENOMEM; | ||
33 | ctx->tfm = crypto_alloc_cipher("aes", 0, 0); | ||
34 | |||
35 | if (IS_ERR(ctx->tfm)) { | ||
36 | err = PTR_ERR(ctx->tfm); | ||
37 | error: kfree(ctx); | ||
38 | pr_err("TCP: TFO aes cipher alloc error: %d\n", err); | ||
39 | return err; | ||
40 | } | ||
41 | err = crypto_cipher_setkey(ctx->tfm, key, len); | ||
42 | if (err) { | ||
43 | pr_err("TCP: TFO cipher key error: %d\n", err); | ||
44 | crypto_free_cipher(ctx->tfm); | ||
45 | goto error; | ||
46 | } | ||
47 | memcpy(ctx->key, key, len); | ||
48 | |||
49 | spin_lock(&tcp_fastopen_ctx_lock); | ||
50 | |||
51 | octx = rcu_dereference_protected(tcp_fastopen_ctx, | ||
52 | lockdep_is_held(&tcp_fastopen_ctx_lock)); | ||
53 | rcu_assign_pointer(tcp_fastopen_ctx, ctx); | ||
54 | spin_unlock(&tcp_fastopen_ctx_lock); | ||
55 | |||
56 | if (octx) | ||
57 | call_rcu(&octx->rcu, tcp_fastopen_ctx_free); | ||
58 | return err; | ||
59 | } | ||
60 | |||
61 | /* Computes the fastopen cookie for the peer. | ||
62 | * The peer address is a 128 bits long (pad with zeros for IPv4). | ||
63 | * | ||
64 | * The caller must check foc->len to determine if a valid cookie | ||
65 | * has been generated successfully. | ||
66 | */ | ||
67 | void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc) | ||
68 | { | ||
69 | __be32 peer_addr[4] = { addr, 0, 0, 0 }; | ||
70 | struct tcp_fastopen_context *ctx; | ||
71 | |||
72 | rcu_read_lock(); | ||
73 | ctx = rcu_dereference(tcp_fastopen_ctx); | ||
74 | if (ctx) { | ||
75 | crypto_cipher_encrypt_one(ctx->tfm, | ||
76 | foc->val, | ||
77 | (__u8 *)peer_addr); | ||
78 | foc->len = TCP_FASTOPEN_COOKIE_SIZE; | ||
79 | } | ||
80 | rcu_read_unlock(); | ||
81 | } | ||
5 | 82 | ||
6 | static int __init tcp_fastopen_init(void) | 83 | static int __init tcp_fastopen_init(void) |
7 | { | 84 | { |
85 | __u8 key[TCP_FASTOPEN_KEY_LENGTH]; | ||
86 | |||
87 | get_random_bytes(key, sizeof(key)); | ||
88 | tcp_fastopen_reset_cipher(key, sizeof(key)); | ||
8 | return 0; | 89 | return 0; |
9 | } | 90 | } |
10 | 91 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ce4ffe9ed556..d47d5fe8f3f0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -378,7 +378,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) | |||
378 | /* 4. Try to fixup all. It is made immediately after connection enters | 378 | /* 4. Try to fixup all. It is made immediately after connection enters |
379 | * established state. | 379 | * established state. |
380 | */ | 380 | */ |
381 | static void tcp_init_buffer_space(struct sock *sk) | 381 | void tcp_init_buffer_space(struct sock *sk) |
382 | { | 382 | { |
383 | struct tcp_sock *tp = tcp_sk(sk); | 383 | struct tcp_sock *tp = tcp_sk(sk); |
384 | int maxwin; | 384 | int maxwin; |
@@ -4038,7 +4038,7 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) | |||
4038 | } | 4038 | } |
4039 | 4039 | ||
4040 | /* When we get a reset we do this. */ | 4040 | /* When we get a reset we do this. */ |
4041 | static void tcp_reset(struct sock *sk) | 4041 | void tcp_reset(struct sock *sk) |
4042 | { | 4042 | { |
4043 | /* We want the right error as BSD sees it (and indeed as we do). */ | 4043 | /* We want the right error as BSD sees it (and indeed as we do). */ |
4044 | switch (sk->sk_state) { | 4044 | switch (sk->sk_state) { |