diff options
author | Jerry Chu <hkchu@google.com> | 2012-08-31 08:29:11 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-08-31 20:02:18 -0400 |
commit | 1046716368979dee857a2b8a91c4a8833f21b9cb (patch) | |
tree | fdda70278e6fa56c27d242fb1a0ec9b7e3e81d38 /net | |
parent | 2a35cfa591ac63f17815c2d9432b799e37527980 (diff) |
tcp: TCP Fast Open Server - header & support functions
This patch adds all the necessary data structure and support
functions to implement TFO server side. It also documents a number
of flags for the sysctl_tcp_fastopen knob, and adds a few Linux
extension MIBs.
In addition, it includes the following:
1. a new TCP_FASTOPEN socket option an application must call to
supply a max backlog allowed in order to enable TFO on its listener.
2. A number of key data structures:
"fastopen_rsk" in tcp_sock - for a big socket to access its
request_sock for retransmission and ack processing purpose. It is
non-NULL iff 3WHS not completed.
"fastopenq" in request_sock_queue - points to a per Fast Open
listener data structure "fastopen_queue" to keep track of qlen (# of
outstanding Fast Open requests) and max_qlen, among other things.
"listener" in tcp_request_sock - to point to the original listener
for book-keeping purpose, i.e., to maintain qlen against max_qlen
as part of defense against IP spoofing attack.
3. various data structure and functions, many in tcp_fastopen.c, to
support server side Fast Open cookie operations, including
/proc/sys/net/ipv4/tcp_fastopen_key to allow manual rekeying.
Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/proc.c | 4 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 45 | ||||
-rw-r--r-- | net/ipv4/tcp_fastopen.c | 83 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 4 |
4 files changed, 133 insertions, 3 deletions
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 957acd12250b..8de53e1ddd54 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -263,6 +263,10 @@ static const struct snmp_mib snmp4_net_list[] = { | |||
263 | SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK), | 263 | SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK), |
264 | SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE), | 264 | SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE), |
265 | SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE), | 265 | SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE), |
266 | SNMP_MIB_ITEM("TCPFastOpenPassive", LINUX_MIB_TCPFASTOPENPASSIVE), | ||
267 | SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL), | ||
268 | SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), | ||
269 | SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), | ||
266 | SNMP_MIB_SENTINEL | 270 | SNMP_MIB_SENTINEL |
267 | }; | 271 | }; |
268 | 272 | ||
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 3e78c79b5586..9205e492dc9d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -232,6 +232,45 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, | |||
232 | return 0; | 232 | return 0; |
233 | } | 233 | } |
234 | 234 | ||
235 | int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer, | ||
236 | size_t *lenp, loff_t *ppos) | ||
237 | { | ||
238 | ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; | ||
239 | struct tcp_fastopen_context *ctxt; | ||
240 | int ret; | ||
241 | u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ | ||
242 | |||
243 | tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); | ||
244 | if (!tbl.data) | ||
245 | return -ENOMEM; | ||
246 | |||
247 | rcu_read_lock(); | ||
248 | ctxt = rcu_dereference(tcp_fastopen_ctx); | ||
249 | if (ctxt) | ||
250 | memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); | ||
251 | rcu_read_unlock(); | ||
252 | |||
253 | snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x", | ||
254 | user_key[0], user_key[1], user_key[2], user_key[3]); | ||
255 | ret = proc_dostring(&tbl, write, buffer, lenp, ppos); | ||
256 | |||
257 | if (write && ret == 0) { | ||
258 | if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1, | ||
259 | user_key + 2, user_key + 3) != 4) { | ||
260 | ret = -EINVAL; | ||
261 | goto bad_key; | ||
262 | } | ||
263 | tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH); | ||
264 | } | ||
265 | |||
266 | bad_key: | ||
267 | pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n", | ||
268 | user_key[0], user_key[1], user_key[2], user_key[3], | ||
269 | (char *)tbl.data, ret); | ||
270 | kfree(tbl.data); | ||
271 | return ret; | ||
272 | } | ||
273 | |||
235 | static struct ctl_table ipv4_table[] = { | 274 | static struct ctl_table ipv4_table[] = { |
236 | { | 275 | { |
237 | .procname = "tcp_timestamps", | 276 | .procname = "tcp_timestamps", |
@@ -386,6 +425,12 @@ static struct ctl_table ipv4_table[] = { | |||
386 | .proc_handler = proc_dointvec, | 425 | .proc_handler = proc_dointvec, |
387 | }, | 426 | }, |
388 | { | 427 | { |
428 | .procname = "tcp_fastopen_key", | ||
429 | .mode = 0600, | ||
430 | .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), | ||
431 | .proc_handler = proc_tcp_fastopen_key, | ||
432 | }, | ||
433 | { | ||
389 | .procname = "tcp_tw_recycle", | 434 | .procname = "tcp_tw_recycle", |
390 | .data = &tcp_death_row.sysctl_tw_recycle, | 435 | .data = &tcp_death_row.sysctl_tw_recycle, |
391 | .maxlen = sizeof(int), | 436 | .maxlen = sizeof(int), |
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index a7f729c409d7..8f7ef0ad80e5 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c | |||
@@ -1,10 +1,91 @@ | |||
1 | #include <linux/err.h> | ||
1 | #include <linux/init.h> | 2 | #include <linux/init.h> |
2 | #include <linux/kernel.h> | 3 | #include <linux/kernel.h> |
4 | #include <linux/list.h> | ||
5 | #include <linux/tcp.h> | ||
6 | #include <linux/rcupdate.h> | ||
7 | #include <linux/rculist.h> | ||
8 | #include <net/inetpeer.h> | ||
9 | #include <net/tcp.h> | ||
3 | 10 | ||
4 | int sysctl_tcp_fastopen; | 11 | int sysctl_tcp_fastopen __read_mostly; |
12 | |||
13 | struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; | ||
14 | |||
15 | static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock); | ||
16 | |||
17 | static void tcp_fastopen_ctx_free(struct rcu_head *head) | ||
18 | { | ||
19 | struct tcp_fastopen_context *ctx = | ||
20 | container_of(head, struct tcp_fastopen_context, rcu); | ||
21 | crypto_free_cipher(ctx->tfm); | ||
22 | kfree(ctx); | ||
23 | } | ||
24 | |||
25 | int tcp_fastopen_reset_cipher(void *key, unsigned int len) | ||
26 | { | ||
27 | int err; | ||
28 | struct tcp_fastopen_context *ctx, *octx; | ||
29 | |||
30 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | ||
31 | if (!ctx) | ||
32 | return -ENOMEM; | ||
33 | ctx->tfm = crypto_alloc_cipher("aes", 0, 0); | ||
34 | |||
35 | if (IS_ERR(ctx->tfm)) { | ||
36 | err = PTR_ERR(ctx->tfm); | ||
37 | error: kfree(ctx); | ||
38 | pr_err("TCP: TFO aes cipher alloc error: %d\n", err); | ||
39 | return err; | ||
40 | } | ||
41 | err = crypto_cipher_setkey(ctx->tfm, key, len); | ||
42 | if (err) { | ||
43 | pr_err("TCP: TFO cipher key error: %d\n", err); | ||
44 | crypto_free_cipher(ctx->tfm); | ||
45 | goto error; | ||
46 | } | ||
47 | memcpy(ctx->key, key, len); | ||
48 | |||
49 | spin_lock(&tcp_fastopen_ctx_lock); | ||
50 | |||
51 | octx = rcu_dereference_protected(tcp_fastopen_ctx, | ||
52 | lockdep_is_held(&tcp_fastopen_ctx_lock)); | ||
53 | rcu_assign_pointer(tcp_fastopen_ctx, ctx); | ||
54 | spin_unlock(&tcp_fastopen_ctx_lock); | ||
55 | |||
56 | if (octx) | ||
57 | call_rcu(&octx->rcu, tcp_fastopen_ctx_free); | ||
58 | return err; | ||
59 | } | ||
60 | |||
61 | /* Computes the fastopen cookie for the peer. | ||
62 | * The peer address is a 128 bits long (pad with zeros for IPv4). | ||
63 | * | ||
64 | * The caller must check foc->len to determine if a valid cookie | ||
65 | * has been generated successfully. | ||
66 | */ | ||
67 | void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc) | ||
68 | { | ||
69 | __be32 peer_addr[4] = { addr, 0, 0, 0 }; | ||
70 | struct tcp_fastopen_context *ctx; | ||
71 | |||
72 | rcu_read_lock(); | ||
73 | ctx = rcu_dereference(tcp_fastopen_ctx); | ||
74 | if (ctx) { | ||
75 | crypto_cipher_encrypt_one(ctx->tfm, | ||
76 | foc->val, | ||
77 | (__u8 *)peer_addr); | ||
78 | foc->len = TCP_FASTOPEN_COOKIE_SIZE; | ||
79 | } | ||
80 | rcu_read_unlock(); | ||
81 | } | ||
5 | 82 | ||
6 | static int __init tcp_fastopen_init(void) | 83 | static int __init tcp_fastopen_init(void) |
7 | { | 84 | { |
85 | __u8 key[TCP_FASTOPEN_KEY_LENGTH]; | ||
86 | |||
87 | get_random_bytes(key, sizeof(key)); | ||
88 | tcp_fastopen_reset_cipher(key, sizeof(key)); | ||
8 | return 0; | 89 | return 0; |
9 | } | 90 | } |
10 | 91 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ce4ffe9ed556..d47d5fe8f3f0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -378,7 +378,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) | |||
378 | /* 4. Try to fixup all. It is made immediately after connection enters | 378 | /* 4. Try to fixup all. It is made immediately after connection enters |
379 | * established state. | 379 | * established state. |
380 | */ | 380 | */ |
381 | static void tcp_init_buffer_space(struct sock *sk) | 381 | void tcp_init_buffer_space(struct sock *sk) |
382 | { | 382 | { |
383 | struct tcp_sock *tp = tcp_sk(sk); | 383 | struct tcp_sock *tp = tcp_sk(sk); |
384 | int maxwin; | 384 | int maxwin; |
@@ -4038,7 +4038,7 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) | |||
4038 | } | 4038 | } |
4039 | 4039 | ||
4040 | /* When we get a reset we do this. */ | 4040 | /* When we get a reset we do this. */ |
4041 | static void tcp_reset(struct sock *sk) | 4041 | void tcp_reset(struct sock *sk) |
4042 | { | 4042 | { |
4043 | /* We want the right error as BSD sees it (and indeed as we do). */ | 4043 | /* We want the right error as BSD sees it (and indeed as we do). */ |
4044 | switch (sk->sk_state) { | 4044 | switch (sk->sk_state) { |