diff options
author | Ursula Braun <ubraun@linux.vnet.ibm.com> | 2017-10-25 05:01:45 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-10-26 05:00:29 -0400 |
commit | 60e2a7780793bae0debc275a9ccd57f7da0cf195 (patch) | |
tree | 8b65c6c4eb3194718df692952e1b5d547c53de2f /net | |
parent | 145686baab68e9c7594fe9269f47da479c25ad79 (diff) |
tcp: TCP experimental option for SMC
The SMC protocol [1] relies on the use of a new TCP experimental
option [2, 3]. With this option, SMC capabilities are exchanged
between peers during the TCP three way handshake. This patch adds
support for this experimental option to TCP.
References:
[1] SMC-R Informational RFC: http://www.rfc-editor.org/info/rfc7609
[2] Shared Use of TCP Experimental Options RFC 6994:
https://tools.ietf.org/rfc/rfc6994.txt
[3] IANA ExID SMCR:
http://www.iana.org/assignments/tcp-parameters/tcp-parameters.xhtml#tcp-exids
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/tcp.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 35 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 19 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 63 |
4 files changed, 120 insertions, 3 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8f36277e82e9..f6e1c00e300e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -270,6 +270,7 @@ | |||
270 | #include <linux/time.h> | 270 | #include <linux/time.h> |
271 | #include <linux/slab.h> | 271 | #include <linux/slab.h> |
272 | #include <linux/errqueue.h> | 272 | #include <linux/errqueue.h> |
273 | #include <linux/static_key.h> | ||
273 | 274 | ||
274 | #include <net/icmp.h> | 275 | #include <net/icmp.h> |
275 | #include <net/inet_common.h> | 276 | #include <net/inet_common.h> |
@@ -302,6 +303,11 @@ EXPORT_SYMBOL(sysctl_tcp_wmem); | |||
302 | atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ | 303 | atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ |
303 | EXPORT_SYMBOL(tcp_memory_allocated); | 304 | EXPORT_SYMBOL(tcp_memory_allocated); |
304 | 305 | ||
306 | #if IS_ENABLED(CONFIG_SMC) | ||
307 | DEFINE_STATIC_KEY_FALSE(tcp_have_smc); | ||
308 | EXPORT_SYMBOL(tcp_have_smc); | ||
309 | #endif | ||
310 | |||
305 | /* | 311 | /* |
306 | * Current number of TCP sockets. | 312 | * Current number of TCP sockets. |
307 | */ | 313 | */ |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 893286db4623..337f6011528a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -76,6 +76,8 @@ | |||
76 | #include <asm/unaligned.h> | 76 | #include <asm/unaligned.h> |
77 | #include <linux/errqueue.h> | 77 | #include <linux/errqueue.h> |
78 | #include <trace/events/tcp.h> | 78 | #include <trace/events/tcp.h> |
79 | #include <linux/unaligned/access_ok.h> | ||
80 | #include <linux/static_key.h> | ||
79 | 81 | ||
80 | int sysctl_tcp_fack __read_mostly; | 82 | int sysctl_tcp_fack __read_mostly; |
81 | int sysctl_tcp_max_reordering __read_mostly = 300; | 83 | int sysctl_tcp_max_reordering __read_mostly = 300; |
@@ -3737,6 +3739,21 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie, | |||
3737 | foc->exp = exp_opt; | 3739 | foc->exp = exp_opt; |
3738 | } | 3740 | } |
3739 | 3741 | ||
3742 | static void smc_parse_options(const struct tcphdr *th, | ||
3743 | struct tcp_options_received *opt_rx, | ||
3744 | const unsigned char *ptr, | ||
3745 | int opsize) | ||
3746 | { | ||
3747 | #if IS_ENABLED(CONFIG_SMC) | ||
3748 | if (static_branch_unlikely(&tcp_have_smc)) { | ||
3749 | if (th->syn && !(opsize & 1) && | ||
3750 | opsize >= TCPOLEN_EXP_SMC_BASE && | ||
3751 | get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) | ||
3752 | opt_rx->smc_ok = 1; | ||
3753 | } | ||
3754 | #endif | ||
3755 | } | ||
3756 | |||
3740 | /* Look for tcp options. Normally only called on SYN and SYNACK packets. | 3757 | /* Look for tcp options. Normally only called on SYN and SYNACK packets. |
3741 | * But, this can also be called on packets in the established flow when | 3758 | * But, this can also be called on packets in the established flow when |
3742 | * the fast version below fails. | 3759 | * the fast version below fails. |
@@ -3844,6 +3861,9 @@ void tcp_parse_options(const struct net *net, | |||
3844 | tcp_parse_fastopen_option(opsize - | 3861 | tcp_parse_fastopen_option(opsize - |
3845 | TCPOLEN_EXP_FASTOPEN_BASE, | 3862 | TCPOLEN_EXP_FASTOPEN_BASE, |
3846 | ptr + 2, th->syn, foc, true); | 3863 | ptr + 2, th->syn, foc, true); |
3864 | else | ||
3865 | smc_parse_options(th, opt_rx, ptr, | ||
3866 | opsize); | ||
3847 | break; | 3867 | break; |
3848 | 3868 | ||
3849 | } | 3869 | } |
@@ -5598,6 +5618,16 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, | |||
5598 | return false; | 5618 | return false; |
5599 | } | 5619 | } |
5600 | 5620 | ||
5621 | static void smc_check_reset_syn(struct tcp_sock *tp) | ||
5622 | { | ||
5623 | #if IS_ENABLED(CONFIG_SMC) | ||
5624 | if (static_branch_unlikely(&tcp_have_smc)) { | ||
5625 | if (tp->syn_smc && !tp->rx_opt.smc_ok) | ||
5626 | tp->syn_smc = 0; | ||
5627 | } | ||
5628 | #endif | ||
5629 | } | ||
5630 | |||
5601 | static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | 5631 | static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, |
5602 | const struct tcphdr *th) | 5632 | const struct tcphdr *th) |
5603 | { | 5633 | { |
@@ -5704,6 +5734,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
5704 | * is initialized. */ | 5734 | * is initialized. */ |
5705 | tp->copied_seq = tp->rcv_nxt; | 5735 | tp->copied_seq = tp->rcv_nxt; |
5706 | 5736 | ||
5737 | smc_check_reset_syn(tp); | ||
5738 | |||
5707 | smp_mb(); | 5739 | smp_mb(); |
5708 | 5740 | ||
5709 | tcp_finish_connect(sk, skb); | 5741 | tcp_finish_connect(sk, skb); |
@@ -6157,6 +6189,9 @@ static void tcp_openreq_init(struct request_sock *req, | |||
6157 | ireq->ir_rmt_port = tcp_hdr(skb)->source; | 6189 | ireq->ir_rmt_port = tcp_hdr(skb)->source; |
6158 | ireq->ir_num = ntohs(tcp_hdr(skb)->dest); | 6190 | ireq->ir_num = ntohs(tcp_hdr(skb)->dest); |
6159 | ireq->ir_mark = inet_request_mark(sk, skb); | 6191 | ireq->ir_mark = inet_request_mark(sk, skb); |
6192 | #if IS_ENABLED(CONFIG_SMC) | ||
6193 | ireq->smc_ok = rx_opt->smc_ok; | ||
6194 | #endif | ||
6160 | } | 6195 | } |
6161 | 6196 | ||
6162 | struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, | 6197 | struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a952357054f4..056009f1c14f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/sysctl.h> | 24 | #include <linux/sysctl.h> |
25 | #include <linux/workqueue.h> | 25 | #include <linux/workqueue.h> |
26 | #include <linux/static_key.h> | ||
26 | #include <net/tcp.h> | 27 | #include <net/tcp.h> |
27 | #include <net/inet_common.h> | 28 | #include <net/inet_common.h> |
28 | #include <net/xfrm.h> | 29 | #include <net/xfrm.h> |
@@ -416,6 +417,21 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) | |||
416 | } | 417 | } |
417 | EXPORT_SYMBOL_GPL(tcp_ca_openreq_child); | 418 | EXPORT_SYMBOL_GPL(tcp_ca_openreq_child); |
418 | 419 | ||
420 | static void smc_check_reset_syn_req(struct tcp_sock *oldtp, | ||
421 | struct request_sock *req, | ||
422 | struct tcp_sock *newtp) | ||
423 | { | ||
424 | #if IS_ENABLED(CONFIG_SMC) | ||
425 | struct inet_request_sock *ireq; | ||
426 | |||
427 | if (static_branch_unlikely(&tcp_have_smc)) { | ||
428 | ireq = inet_rsk(req); | ||
429 | if (oldtp->syn_smc && !ireq->smc_ok) | ||
430 | newtp->syn_smc = 0; | ||
431 | } | ||
432 | #endif | ||
433 | } | ||
434 | |||
419 | /* This is not only more efficient than what we used to do, it eliminates | 435 | /* This is not only more efficient than what we used to do, it eliminates |
420 | * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM | 436 | * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM |
421 | * | 437 | * |
@@ -433,6 +449,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, | |||
433 | struct tcp_request_sock *treq = tcp_rsk(req); | 449 | struct tcp_request_sock *treq = tcp_rsk(req); |
434 | struct inet_connection_sock *newicsk = inet_csk(newsk); | 450 | struct inet_connection_sock *newicsk = inet_csk(newsk); |
435 | struct tcp_sock *newtp = tcp_sk(newsk); | 451 | struct tcp_sock *newtp = tcp_sk(newsk); |
452 | struct tcp_sock *oldtp = tcp_sk(sk); | ||
453 | |||
454 | smc_check_reset_syn_req(oldtp, req, newtp); | ||
436 | 455 | ||
437 | /* Now setup tcp_sock */ | 456 | /* Now setup tcp_sock */ |
438 | newtp->pred_flags = 0; | 457 | newtp->pred_flags = 0; |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1f01f4c9c738..c8fc512e0bbb 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/compiler.h> | 41 | #include <linux/compiler.h> |
42 | #include <linux/gfp.h> | 42 | #include <linux/gfp.h> |
43 | #include <linux/module.h> | 43 | #include <linux/module.h> |
44 | #include <linux/static_key.h> | ||
44 | 45 | ||
45 | #include <trace/events/tcp.h> | 46 | #include <trace/events/tcp.h> |
46 | 47 | ||
@@ -422,6 +423,22 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) | |||
422 | #define OPTION_MD5 (1 << 2) | 423 | #define OPTION_MD5 (1 << 2) |
423 | #define OPTION_WSCALE (1 << 3) | 424 | #define OPTION_WSCALE (1 << 3) |
424 | #define OPTION_FAST_OPEN_COOKIE (1 << 8) | 425 | #define OPTION_FAST_OPEN_COOKIE (1 << 8) |
426 | #define OPTION_SMC (1 << 9) | ||
427 | |||
428 | static void smc_options_write(__be32 *ptr, u16 *options) | ||
429 | { | ||
430 | #if IS_ENABLED(CONFIG_SMC) | ||
431 | if (static_branch_unlikely(&tcp_have_smc)) { | ||
432 | if (unlikely(OPTION_SMC & *options)) { | ||
433 | *ptr++ = htonl((TCPOPT_NOP << 24) | | ||
434 | (TCPOPT_NOP << 16) | | ||
435 | (TCPOPT_EXP << 8) | | ||
436 | (TCPOLEN_EXP_SMC_BASE)); | ||
437 | *ptr++ = htonl(TCPOPT_SMC_MAGIC); | ||
438 | } | ||
439 | } | ||
440 | #endif | ||
441 | } | ||
425 | 442 | ||
426 | struct tcp_out_options { | 443 | struct tcp_out_options { |
427 | u16 options; /* bit field of OPTION_* */ | 444 | u16 options; /* bit field of OPTION_* */ |
@@ -540,6 +557,41 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | |||
540 | } | 557 | } |
541 | ptr += (len + 3) >> 2; | 558 | ptr += (len + 3) >> 2; |
542 | } | 559 | } |
560 | |||
561 | smc_options_write(ptr, &options); | ||
562 | } | ||
563 | |||
564 | static void smc_set_option(const struct tcp_sock *tp, | ||
565 | struct tcp_out_options *opts, | ||
566 | unsigned int *remaining) | ||
567 | { | ||
568 | #if IS_ENABLED(CONFIG_SMC) | ||
569 | if (static_branch_unlikely(&tcp_have_smc)) { | ||
570 | if (tp->syn_smc) { | ||
571 | if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) { | ||
572 | opts->options |= OPTION_SMC; | ||
573 | *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED; | ||
574 | } | ||
575 | } | ||
576 | } | ||
577 | #endif | ||
578 | } | ||
579 | |||
580 | static void smc_set_option_cond(const struct tcp_sock *tp, | ||
581 | const struct inet_request_sock *ireq, | ||
582 | struct tcp_out_options *opts, | ||
583 | unsigned int *remaining) | ||
584 | { | ||
585 | #if IS_ENABLED(CONFIG_SMC) | ||
586 | if (static_branch_unlikely(&tcp_have_smc)) { | ||
587 | if (tp->syn_smc && ireq->smc_ok) { | ||
588 | if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) { | ||
589 | opts->options |= OPTION_SMC; | ||
590 | *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED; | ||
591 | } | ||
592 | } | ||
593 | } | ||
594 | #endif | ||
543 | } | 595 | } |
544 | 596 | ||
545 | /* Compute TCP options for SYN packets. This is not the final | 597 | /* Compute TCP options for SYN packets. This is not the final |
@@ -607,11 +659,14 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, | |||
607 | } | 659 | } |
608 | } | 660 | } |
609 | 661 | ||
662 | smc_set_option(tp, opts, &remaining); | ||
663 | |||
610 | return MAX_TCP_OPTION_SPACE - remaining; | 664 | return MAX_TCP_OPTION_SPACE - remaining; |
611 | } | 665 | } |
612 | 666 | ||
613 | /* Set up TCP options for SYN-ACKs. */ | 667 | /* Set up TCP options for SYN-ACKs. */ |
614 | static unsigned int tcp_synack_options(struct request_sock *req, | 668 | static unsigned int tcp_synack_options(const struct sock *sk, |
669 | struct request_sock *req, | ||
615 | unsigned int mss, struct sk_buff *skb, | 670 | unsigned int mss, struct sk_buff *skb, |
616 | struct tcp_out_options *opts, | 671 | struct tcp_out_options *opts, |
617 | const struct tcp_md5sig_key *md5, | 672 | const struct tcp_md5sig_key *md5, |
@@ -667,6 +722,8 @@ static unsigned int tcp_synack_options(struct request_sock *req, | |||
667 | } | 722 | } |
668 | } | 723 | } |
669 | 724 | ||
725 | smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining); | ||
726 | |||
670 | return MAX_TCP_OPTION_SPACE - remaining; | 727 | return MAX_TCP_OPTION_SPACE - remaining; |
671 | } | 728 | } |
672 | 729 | ||
@@ -3195,8 +3252,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, | |||
3195 | md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); | 3252 | md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); |
3196 | #endif | 3253 | #endif |
3197 | skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); | 3254 | skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); |
3198 | tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) + | 3255 | tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, |
3199 | sizeof(*th); | 3256 | foc) + sizeof(*th); |
3200 | 3257 | ||
3201 | skb_push(skb, tcp_header_size); | 3258 | skb_push(skb, tcp_header_size); |
3202 | skb_reset_transport_header(skb); | 3259 | skb_reset_transport_header(skb); |