aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorUrsula Braun <ubraun@linux.vnet.ibm.com>2017-10-25 05:01:45 -0400
committerDavid S. Miller <davem@davemloft.net>2017-10-26 05:00:29 -0400
commit60e2a7780793bae0debc275a9ccd57f7da0cf195 (patch)
tree8b65c6c4eb3194718df692952e1b5d547c53de2f /net
parent145686baab68e9c7594fe9269f47da479c25ad79 (diff)
tcp: TCP experimental option for SMC
The SMC protocol [1] relies on the use of a new TCP experimental option [2, 3]. With this option, SMC capabilities are exchanged between peers during the TCP three way handshake. This patch adds support for this experimental option to TCP. References: [1] SMC-R Informational RFC: http://www.rfc-editor.org/info/rfc7609 [2] Shared Use of TCP Experimental Options RFC 6994: https://tools.ietf.org/rfc/rfc6994.txt [3] IANA ExID SMCR: http://www.iana.org/assignments/tcp-parameters/tcp-parameters.xhtml#tcp-exids Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp.c6
-rw-r--r--net/ipv4/tcp_input.c35
-rw-r--r--net/ipv4/tcp_minisocks.c19
-rw-r--r--net/ipv4/tcp_output.c63
4 files changed, 120 insertions, 3 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8f36277e82e9..f6e1c00e300e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -270,6 +270,7 @@
270#include <linux/time.h> 270#include <linux/time.h>
271#include <linux/slab.h> 271#include <linux/slab.h>
272#include <linux/errqueue.h> 272#include <linux/errqueue.h>
273#include <linux/static_key.h>
273 274
274#include <net/icmp.h> 275#include <net/icmp.h>
275#include <net/inet_common.h> 276#include <net/inet_common.h>
@@ -302,6 +303,11 @@ EXPORT_SYMBOL(sysctl_tcp_wmem);
302atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ 303atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
303EXPORT_SYMBOL(tcp_memory_allocated); 304EXPORT_SYMBOL(tcp_memory_allocated);
304 305
306#if IS_ENABLED(CONFIG_SMC)
307DEFINE_STATIC_KEY_FALSE(tcp_have_smc);
308EXPORT_SYMBOL(tcp_have_smc);
309#endif
310
305/* 311/*
306 * Current number of TCP sockets. 312 * Current number of TCP sockets.
307 */ 313 */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 893286db4623..337f6011528a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -76,6 +76,8 @@
76#include <asm/unaligned.h> 76#include <asm/unaligned.h>
77#include <linux/errqueue.h> 77#include <linux/errqueue.h>
78#include <trace/events/tcp.h> 78#include <trace/events/tcp.h>
79#include <linux/unaligned/access_ok.h>
80#include <linux/static_key.h>
79 81
80int sysctl_tcp_fack __read_mostly; 82int sysctl_tcp_fack __read_mostly;
81int sysctl_tcp_max_reordering __read_mostly = 300; 83int sysctl_tcp_max_reordering __read_mostly = 300;
@@ -3737,6 +3739,21 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
3737 foc->exp = exp_opt; 3739 foc->exp = exp_opt;
3738} 3740}
3739 3741
3742static void smc_parse_options(const struct tcphdr *th,
3743 struct tcp_options_received *opt_rx,
3744 const unsigned char *ptr,
3745 int opsize)
3746{
3747#if IS_ENABLED(CONFIG_SMC)
3748 if (static_branch_unlikely(&tcp_have_smc)) {
3749 if (th->syn && !(opsize & 1) &&
3750 opsize >= TCPOLEN_EXP_SMC_BASE &&
3751 get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
3752 opt_rx->smc_ok = 1;
3753 }
3754#endif
3755}
3756
3740/* Look for tcp options. Normally only called on SYN and SYNACK packets. 3757/* Look for tcp options. Normally only called on SYN and SYNACK packets.
3741 * But, this can also be called on packets in the established flow when 3758 * But, this can also be called on packets in the established flow when
3742 * the fast version below fails. 3759 * the fast version below fails.
@@ -3844,6 +3861,9 @@ void tcp_parse_options(const struct net *net,
3844 tcp_parse_fastopen_option(opsize - 3861 tcp_parse_fastopen_option(opsize -
3845 TCPOLEN_EXP_FASTOPEN_BASE, 3862 TCPOLEN_EXP_FASTOPEN_BASE,
3846 ptr + 2, th->syn, foc, true); 3863 ptr + 2, th->syn, foc, true);
3864 else
3865 smc_parse_options(th, opt_rx, ptr,
3866 opsize);
3847 break; 3867 break;
3848 3868
3849 } 3869 }
@@ -5598,6 +5618,16 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5598 return false; 5618 return false;
5599} 5619}
5600 5620
5621static void smc_check_reset_syn(struct tcp_sock *tp)
5622{
5623#if IS_ENABLED(CONFIG_SMC)
5624 if (static_branch_unlikely(&tcp_have_smc)) {
5625 if (tp->syn_smc && !tp->rx_opt.smc_ok)
5626 tp->syn_smc = 0;
5627 }
5628#endif
5629}
5630
5601static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, 5631static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5602 const struct tcphdr *th) 5632 const struct tcphdr *th)
5603{ 5633{
@@ -5704,6 +5734,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5704 * is initialized. */ 5734 * is initialized. */
5705 tp->copied_seq = tp->rcv_nxt; 5735 tp->copied_seq = tp->rcv_nxt;
5706 5736
5737 smc_check_reset_syn(tp);
5738
5707 smp_mb(); 5739 smp_mb();
5708 5740
5709 tcp_finish_connect(sk, skb); 5741 tcp_finish_connect(sk, skb);
@@ -6157,6 +6189,9 @@ static void tcp_openreq_init(struct request_sock *req,
6157 ireq->ir_rmt_port = tcp_hdr(skb)->source; 6189 ireq->ir_rmt_port = tcp_hdr(skb)->source;
6158 ireq->ir_num = ntohs(tcp_hdr(skb)->dest); 6190 ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
6159 ireq->ir_mark = inet_request_mark(sk, skb); 6191 ireq->ir_mark = inet_request_mark(sk, skb);
6192#if IS_ENABLED(CONFIG_SMC)
6193 ireq->smc_ok = rx_opt->smc_ok;
6194#endif
6160} 6195}
6161 6196
6162struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, 6197struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a952357054f4..056009f1c14f 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -23,6 +23,7 @@
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/sysctl.h> 24#include <linux/sysctl.h>
25#include <linux/workqueue.h> 25#include <linux/workqueue.h>
26#include <linux/static_key.h>
26#include <net/tcp.h> 27#include <net/tcp.h>
27#include <net/inet_common.h> 28#include <net/inet_common.h>
28#include <net/xfrm.h> 29#include <net/xfrm.h>
@@ -416,6 +417,21 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
416} 417}
417EXPORT_SYMBOL_GPL(tcp_ca_openreq_child); 418EXPORT_SYMBOL_GPL(tcp_ca_openreq_child);
418 419
420static void smc_check_reset_syn_req(struct tcp_sock *oldtp,
421 struct request_sock *req,
422 struct tcp_sock *newtp)
423{
424#if IS_ENABLED(CONFIG_SMC)
425 struct inet_request_sock *ireq;
426
427 if (static_branch_unlikely(&tcp_have_smc)) {
428 ireq = inet_rsk(req);
429 if (oldtp->syn_smc && !ireq->smc_ok)
430 newtp->syn_smc = 0;
431 }
432#endif
433}
434
419/* This is not only more efficient than what we used to do, it eliminates 435/* This is not only more efficient than what we used to do, it eliminates
420 * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM 436 * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
421 * 437 *
@@ -433,6 +449,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
433 struct tcp_request_sock *treq = tcp_rsk(req); 449 struct tcp_request_sock *treq = tcp_rsk(req);
434 struct inet_connection_sock *newicsk = inet_csk(newsk); 450 struct inet_connection_sock *newicsk = inet_csk(newsk);
435 struct tcp_sock *newtp = tcp_sk(newsk); 451 struct tcp_sock *newtp = tcp_sk(newsk);
452 struct tcp_sock *oldtp = tcp_sk(sk);
453
454 smc_check_reset_syn_req(oldtp, req, newtp);
436 455
437 /* Now setup tcp_sock */ 456 /* Now setup tcp_sock */
438 newtp->pred_flags = 0; 457 newtp->pred_flags = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1f01f4c9c738..c8fc512e0bbb 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -41,6 +41,7 @@
41#include <linux/compiler.h> 41#include <linux/compiler.h>
42#include <linux/gfp.h> 42#include <linux/gfp.h>
43#include <linux/module.h> 43#include <linux/module.h>
44#include <linux/static_key.h>
44 45
45#include <trace/events/tcp.h> 46#include <trace/events/tcp.h>
46 47
@@ -422,6 +423,22 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
422#define OPTION_MD5 (1 << 2) 423#define OPTION_MD5 (1 << 2)
423#define OPTION_WSCALE (1 << 3) 424#define OPTION_WSCALE (1 << 3)
424#define OPTION_FAST_OPEN_COOKIE (1 << 8) 425#define OPTION_FAST_OPEN_COOKIE (1 << 8)
426#define OPTION_SMC (1 << 9)
427
428static void smc_options_write(__be32 *ptr, u16 *options)
429{
430#if IS_ENABLED(CONFIG_SMC)
431 if (static_branch_unlikely(&tcp_have_smc)) {
432 if (unlikely(OPTION_SMC & *options)) {
433 *ptr++ = htonl((TCPOPT_NOP << 24) |
434 (TCPOPT_NOP << 16) |
435 (TCPOPT_EXP << 8) |
436 (TCPOLEN_EXP_SMC_BASE));
437 *ptr++ = htonl(TCPOPT_SMC_MAGIC);
438 }
439 }
440#endif
441}
425 442
426struct tcp_out_options { 443struct tcp_out_options {
427 u16 options; /* bit field of OPTION_* */ 444 u16 options; /* bit field of OPTION_* */
@@ -540,6 +557,41 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
540 } 557 }
541 ptr += (len + 3) >> 2; 558 ptr += (len + 3) >> 2;
542 } 559 }
560
561 smc_options_write(ptr, &options);
562}
563
564static void smc_set_option(const struct tcp_sock *tp,
565 struct tcp_out_options *opts,
566 unsigned int *remaining)
567{
568#if IS_ENABLED(CONFIG_SMC)
569 if (static_branch_unlikely(&tcp_have_smc)) {
570 if (tp->syn_smc) {
571 if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
572 opts->options |= OPTION_SMC;
573 *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
574 }
575 }
576 }
577#endif
578}
579
580static void smc_set_option_cond(const struct tcp_sock *tp,
581 const struct inet_request_sock *ireq,
582 struct tcp_out_options *opts,
583 unsigned int *remaining)
584{
585#if IS_ENABLED(CONFIG_SMC)
586 if (static_branch_unlikely(&tcp_have_smc)) {
587 if (tp->syn_smc && ireq->smc_ok) {
588 if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
589 opts->options |= OPTION_SMC;
590 *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
591 }
592 }
593 }
594#endif
543} 595}
544 596
545/* Compute TCP options for SYN packets. This is not the final 597/* Compute TCP options for SYN packets. This is not the final
@@ -607,11 +659,14 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
607 } 659 }
608 } 660 }
609 661
662 smc_set_option(tp, opts, &remaining);
663
610 return MAX_TCP_OPTION_SPACE - remaining; 664 return MAX_TCP_OPTION_SPACE - remaining;
611} 665}
612 666
613/* Set up TCP options for SYN-ACKs. */ 667/* Set up TCP options for SYN-ACKs. */
614static unsigned int tcp_synack_options(struct request_sock *req, 668static unsigned int tcp_synack_options(const struct sock *sk,
669 struct request_sock *req,
615 unsigned int mss, struct sk_buff *skb, 670 unsigned int mss, struct sk_buff *skb,
616 struct tcp_out_options *opts, 671 struct tcp_out_options *opts,
617 const struct tcp_md5sig_key *md5, 672 const struct tcp_md5sig_key *md5,
@@ -667,6 +722,8 @@ static unsigned int tcp_synack_options(struct request_sock *req,
667 } 722 }
668 } 723 }
669 724
725 smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
726
670 return MAX_TCP_OPTION_SPACE - remaining; 727 return MAX_TCP_OPTION_SPACE - remaining;
671} 728}
672 729
@@ -3195,8 +3252,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
3195 md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); 3252 md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
3196#endif 3253#endif
3197 skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); 3254 skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
3198 tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) + 3255 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
3199 sizeof(*th); 3256 foc) + sizeof(*th);
3200 3257
3201 skb_push(skb, tcp_header_size); 3258 skb_push(skb, tcp_header_size);
3202 skb_reset_transport_header(skb); 3259 skb_reset_transport_header(skb);